From 38e27cbb6b21b4fc4c01661271159938eb5fdcaf Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:40:33 -0400 Subject: [PATCH 01/32] Upload all package files The repository only had the pygad.py script and some few other scripts. Other scripts in the library exist in these repositories: 1) https://github.com/ahmedfgad/NumPyANN 2) https://github.com/ahmedfgad/NumPyCNN 3) https://github.com/ahmedfgad/NeuralGenetic 4) https://github.com/ahmedfgad/CNNGenetic 5) https://github.com/ahmedfgad/KerasGA 6) https://github.com/ahmedfgad/TorchGA Now, the pygad directory has all the scripts in the library. --- pygad/__init__.py | 3 + pygad/cnn/__init__.py | 4 + pygad/cnn/cnn.py | 849 ++++++++++++ pygad/gacnn/__init__.py | 4 + pygad/gacnn/gacnn.py | 97 ++ pygad/gann/__init__.py | 4 + pygad/gann/gann.py | 269 ++++ pygad/helper/__init__.py | 3 + pygad/helper/unique.py | 638 +++++++++ pygad/kerasga/__init__.py | 3 + pygad/kerasga/kerasga.py | 89 ++ pygad/nn/__init__.py | 4 + pygad/nn/nn.py | 399 ++++++ pygad/pygad.py | 2219 +++++++++++++++++++++++++++++++ pygad/torchga/__init__.py | 3 + pygad/torchga/torchga.py | 90 ++ pygad/utils/__init__.py | 5 + pygad/utils/crossover.py | 248 ++++ pygad/utils/mutation.py | 822 ++++++++++++ pygad/utils/parent_selection.py | 191 +++ pygad/visualize/__init__.py | 3 + pygad/visualize/plot.py | 348 +++++ 22 files changed, 6295 insertions(+) create mode 100644 pygad/__init__.py create mode 100644 pygad/cnn/__init__.py create mode 100644 pygad/cnn/cnn.py create mode 100644 pygad/gacnn/__init__.py create mode 100644 pygad/gacnn/gacnn.py create mode 100644 pygad/gann/__init__.py create mode 100644 pygad/gann/gann.py create mode 100644 pygad/helper/__init__.py create mode 100644 pygad/helper/unique.py create mode 100644 pygad/kerasga/__init__.py create mode 100644 pygad/kerasga/kerasga.py create mode 100644 pygad/nn/__init__.py create mode 100644 pygad/nn/nn.py create mode 100644 pygad/pygad.py create mode 100644 pygad/torchga/__init__.py create mode 100644 pygad/torchga/torchga.py create mode 100644 pygad/utils/__init__.py create mode 100644 pygad/utils/crossover.py create mode 100644 pygad/utils/mutation.py create mode 100644 pygad/utils/parent_selection.py create mode 100644 pygad/visualize/__init__.py create mode 100644 pygad/visualize/plot.py diff --git a/pygad/__init__.py b/pygad/__init__.py new file mode 100644 index 0000000..71f207b --- /dev/null +++ b/pygad/__init__.py @@ -0,0 +1,3 @@ +from .pygad import * # Relative import. + +__version__ = "3.0.0" diff --git a/pygad/cnn/__init__.py b/pygad/cnn/__init__.py new file mode 100644 index 0000000..57e6d6f --- /dev/null +++ b/pygad/cnn/__init__.py @@ -0,0 +1,4 @@ +from .cnn import * + +__version__ = "1.0.0" + diff --git a/pygad/cnn/cnn.py b/pygad/cnn/cnn.py new file mode 100644 index 0000000..0e78a52 --- /dev/null +++ b/pygad/cnn/cnn.py @@ -0,0 +1,849 @@ +import numpy +import functools + +""" +Convolutional neural network implementation using NumPy +A tutorial that helps to get started (Building Convolutional Neural Network using NumPy from Scratch) available in these links: + https://www.linkedin.com/pulse/building-convolutional-neural-network-using-numpy-from-ahmed-gad + https://towardsdatascience.com/building-convolutional-neural-network-using-numpy-from-scratch-b30aac50e50a + https://www.kdnuggets.com/2018/04/building-convolutional-neural-network-numpy-scratch.html +It is also translated into Chinese: http://m.aliyun.com/yunqi/articles/585741 +""" + +# Supported activation functions by the cnn.py module. +supported_activation_functions = ("sigmoid", "relu", "softmax") + +def sigmoid(sop): + + """ + Applies the sigmoid function. + + sop: The input to which the sigmoid function is applied. + + Returns the result of the sigmoid function. + """ + + if type(sop) in [list, tuple]: + sop = numpy.array(sop) + + return 1.0 / (1 + numpy.exp(-1 * sop)) + +def relu(sop): + + """ + Applies the rectified linear unit (ReLU) function. + + sop: The input to which the relu function is applied. + + Returns the result of the ReLU function. + """ + + if not (type(sop) in [list, tuple, numpy.ndarray]): + if sop < 0: + return 0 + else: + return sop + elif type(sop) in [list, tuple]: + sop = numpy.array(sop) + + result = sop + result[sop < 0] = 0 + + return result + +def softmax(layer_outputs): + + """ + Applies the sotmax function. + + sop: The input to which the softmax function is applied. + + Returns the result of the softmax function. + """ + return layer_outputs / (numpy.sum(layer_outputs) + 0.000001) + +def layers_weights(model, initial=True): + + """ + Creates a list holding the weights of all layers in the CNN. + + model: A reference to the instance from the cnn.Model class. + initial: When True, the function returns the initial weights of the layers. When False, the trained weights of the layers are returned. The initial weights are only needed before network training starts. The trained weights are needed to predict the network outputs. + + Returns a list (network_weights) holding the weights of the layers in the CNN. + """ + + network_weights = [] + + layer = model.last_layer + while "previous_layer" in layer.__init__.__code__.co_varnames: + if type(layer) in [Conv2D, Dense]: + # If the 'initial' parameter is True, append the initial weights. Otherwise, append the trained weights. + if initial == True: + network_weights.append(layer.initial_weights) + elif initial == False: + network_weights.append(layer.trained_weights) + else: + raise ValueError("Unexpected value to the 'initial' parameter: {initial}.".format(initial=initial)) + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the Input2D class), raise an error. + if not (type(layer) is Input2D): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return numpy.array(network_weights) + +def layers_weights_as_matrix(model, vector_weights): + + """ + Converts the network weights from vectors to matrices. + + model: A reference to the instance from the cnn.Model class. + vector_weights: The network weights as vectors where the weights of each layer form a single vector. + + Returns a list (network_weights) holding the weights of the CNN layers as matrices. + """ + + network_weights = [] + + start = 0 + layer = model.last_layer + vector_weights = vector_weights[::-1] + while "previous_layer" in layer.__init__.__code__.co_varnames: + if type(layer) in [Conv2D, Dense]: + layer_weights_shape = layer.initial_weights.shape + layer_weights_size = layer.initial_weights.size + + weights_vector=vector_weights[start:start + layer_weights_size] + # matrix = pygad.nn.DenseLayer.to_array(vector=weights_vector, shape=layer_weights_shape) + matrix = numpy.reshape(weights_vector, newshape=(layer_weights_shape)) + network_weights.append(matrix) + + start = start + layer_weights_size + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the Input2D class), raise an error. + if not (type(layer) is Input2D): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return numpy.array(network_weights) + +def layers_weights_as_vector(model, initial=True): + + """ + Creates a list holding the weights of each layer (Conv and Dense) in the CNN as a vector. + + model: A reference to the instance from the cnn.Model class. + initial: When True, the function returns the initial weights of the CNN. When False, the trained weights of the CNN layers are returned. The initial weights are only needed before network training starts. The trained weights are needed to predict the network outputs. + + Returns a list (network_weights) holding the weights of the CNN layers as a vector. + """ + + network_weights = [] + + layer = model.last_layer + while "previous_layer" in layer.__init__.__code__.co_varnames: + if type(layer) in [Conv2D, Dense]: + # If the 'initial' parameter is True, append the initial weights. Otherwise, append the trained weights. + if initial == True: + vector = numpy.reshape(layer.initial_weights, newshape=(layer.initial_weights.size)) + # vector = pygad.nn.DenseLayer.to_vector(matrix=layer.initial_weights) + network_weights.extend(vector) + elif initial == False: + vector = numpy.reshape(layer.trained_weights, newshape=(layer.trained_weights.size)) + # vector = pygad.nn.DenseLayer.to_vector(array=layer.trained_weights) + network_weights.extend(vector) + else: + raise ValueError("Unexpected value to the 'initial' parameter: {initial}.".format(initial=initial)) + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the Input2D class), raise an error. + if not (type(layer) is Input2D): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return numpy.array(network_weights) + +def update_layers_trained_weights(model, final_weights): + + """ + After the network weights are trained, the 'trained_weights' attribute of each layer is updated by the weights calculated after passing all the epochs (such weights are passed in the 'final_weights' parameter). + By just passing a reference to the last layer in the network (i.e. output layer) in addition to the final weights, this function updates the 'trained_weights' attribute of all layers. + + model: A reference to the instance from the cnn.Model class. + final_weights: An array of layers weights as matrices after passing through all the epochs. + """ + + layer = model.last_layer + layer_idx = len(final_weights) - 1 + while "previous_layer" in layer.__init__.__code__.co_varnames: + if type(layer) in [Conv2D, Dense]: + layer.trained_weights = final_weights[layer_idx] + + layer_idx = layer_idx - 1 + + # Go to the previous layer. + layer = layer.previous_layer + +class Input2D: + + """ + Implementing the input layer of a CNN. + The CNN architecture must start with an input layer. + """ + + def __init__(self, input_shape): + + """ + input_shape: Shape of the input sample to the CNN. + """ + + # If the input sample has less than 2 dimensions, then an exception is raised. + if len(input_shape) < 2: + raise ValueError("The Input2D class creates an input layer for data inputs with at least 2 dimensions but ({num_dim}) dimensions found.".format(num_dim=len(input_shape))) + # If the input sample has exactly 2 dimensions, the third dimension is set to 1. + elif len(input_shape) == 2: + input_shape = (input_shape[0], input_shape[1], 1) + + for dim_idx, dim in enumerate(input_shape): + if dim <= 0: + raise ValueError("The dimension size of the inputs cannot be <= 0. Please pass a valid value to the 'input_size' parameter.") + + self.input_shape = input_shape # Shape of the input sample. + self.layer_output_size = input_shape # Shape of the output from the current layer. For an input layer, it is the same as the shape of the input sample. + +class Conv2D: + + """ + Implementing the convolution layer. + """ + + def __init__(self, num_filters, kernel_size, previous_layer, activation_function=None): + + """ + num_filters: Number of filters in the convolution layer. + kernel_size: Kernel size of the filter. + previous_layer: A reference to the previous layer. + activation_function=None: The name of the activation function to be used in the conv layer. If None, then no activation function is applied besides the convolution operation. The activation function can be applied by a separate layer. + """ + + if num_filters <= 0: + raise ValueError("Number of filters cannot be <= 0. Please pass a valid value to the 'num_filters' parameter.") + # Number of filters in the conv layer. + self.num_filters = num_filters + + if kernel_size <= 0: + raise ValueError("The kernel size cannot be <= 0. Please pass a valid value to the 'kernel_size' parameter.") + # Kernel size of each filter. + self.kernel_size = kernel_size + + # Validating the activation function + if (activation_function is None): + self.activation = None + elif (activation_function == "relu"): + self.activation = relu + elif (activation_function == "sigmoid"): + self.activation = sigmoid + elif (activation_function == "softmax"): + raise ValueError("The softmax activation function cannot be used in a conv layer.") + else: + raise ValueError("The specified activation function '{activation_function}' is not among the supported activation functions {supported_activation_functions}. Please use one of the supported functions.".format(activation_function=activation_function, supported_activation_functions=supported_activation_functions)) + + # The activation function used in the current layer. + self.activation_function = activation_function + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # A reference to the bank of filters. + self.filter_bank_size = (self.num_filters, + self.kernel_size, + self.kernel_size, + self.previous_layer.layer_output_size[-1]) + + # Initializing the filters of the conv layer. + self.initial_weights = numpy.random.uniform(low=-0.1, + high=0.1, + size=self.filter_bank_size) + + # The trained filters of the conv layer. Only assigned a value after the network is trained (i.e. the train_network() function completes). + # Just initialized to be equal to the initial filters + self.trained_weights = self.initial_weights.copy() + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + # Later, it must conider strides and paddings + self.layer_output_size = (self.previous_layer.layer_output_size[0] - self.kernel_size + 1, + self.previous_layer.layer_output_size[1] - self.kernel_size + 1, + num_filters) + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def conv_(self, input2D, conv_filter): + + """ + Convolves the input (input2D) by a single filter (conv_filter). + + input2D: The input to be convolved by a single filter. + conv_filter: The filter convolving the input. + + Returns the result of convolution. + """ + + result = numpy.zeros(shape=(input2D.shape[0], input2D.shape[1], conv_filter.shape[0])) + # Looping through the image to apply the convolution operation. + for r in numpy.uint16(numpy.arange(self.filter_bank_size[1]/2.0, + input2D.shape[0]-self.filter_bank_size[1]/2.0+1)): + for c in numpy.uint16(numpy.arange(self.filter_bank_size[1]/2.0, + input2D.shape[1]-self.filter_bank_size[1]/2.0+1)): + """ + Getting the current region to get multiplied with the filter. + How to loop through the image and get the region based on + the image and filer sizes is the most tricky part of convolution. + """ + if len(input2D.shape) == 2: + curr_region = input2D[r-numpy.uint16(numpy.floor(self.filter_bank_size[1]/2.0)):r+numpy.uint16(numpy.ceil(self.filter_bank_size[1]/2.0)), + c-numpy.uint16(numpy.floor(self.filter_bank_size[1]/2.0)):c+numpy.uint16(numpy.ceil(self.filter_bank_size[1]/2.0))] + else: + curr_region = input2D[r-numpy.uint16(numpy.floor(self.filter_bank_size[1]/2.0)):r+numpy.uint16(numpy.ceil(self.filter_bank_size[1]/2.0)), + c-numpy.uint16(numpy.floor(self.filter_bank_size[1]/2.0)):c+numpy.uint16(numpy.ceil(self.filter_bank_size[1]/2.0)), :] + # Element-wise multipliplication between the current region and the filter. + + for filter_idx in range(conv_filter.shape[0]): + curr_result = curr_region * conv_filter[filter_idx] + conv_sum = numpy.sum(curr_result) # Summing the result of multiplication. + + if self.activation is None: + result[r, c, filter_idx] = conv_sum # Saving the SOP in the convolution layer feature map. + else: + result[r, c, filter_idx] = self.activation(conv_sum) # Saving the activation function result in the convolution layer feature map. + + # Clipping the outliers of the result matrix. + final_result = result[numpy.uint16(self.filter_bank_size[1]/2.0):result.shape[0]-numpy.uint16(self.filter_bank_size[1]/2.0), + numpy.uint16(self.filter_bank_size[1]/2.0):result.shape[1]-numpy.uint16(self.filter_bank_size[1]/2.0), :] + return final_result + + def conv(self, input2D): + + """ + Convolves the input (input2D) by a filter bank. + + input2D: The input to be convolved by the filter bank. + + The conv() method saves the result of convolving the input by the filter bank in the layer_output attribute. + """ + + if len(input2D.shape) != len(self.initial_weights.shape) - 1: # Check if there is a match in the number of dimensions between the image and the filters. + raise ValueError("Number of dimensions in the conv filter and the input do not match.") + if len(input2D.shape) > 2 or len(self.initial_weights.shape) > 3: # Check if number of image channels matches the filter depth. + if input2D.shape[-1] != self.initial_weights.shape[-1]: + raise ValueError("Number of channels in both the input and the filter must match.") + if self.initial_weights.shape[1] != self.initial_weights.shape[2]: # Check if filter dimensions are equal. + raise ValueError('A filter must be a square matrix. I.e. number of rows and columns must match.') + if self.initial_weights.shape[1]%2==0: # Check if filter diemnsions are odd. + raise ValueError('A filter must have an odd size. I.e. number of rows and columns must be odd.') + + self.layer_output = self.conv_(input2D, self.trained_weights) + +class AveragePooling2D: + + """ + Implementing the average pooling layer. + """ + + def __init__(self, pool_size, previous_layer, stride=2): + + """ + pool_size: Pool size. + previous_layer: Reference to the previous layer in the CNN architecture. + stride=2: Stride + """ + + if not (type(pool_size) is int): + raise ValueError("The expected type of the pool_size is int but {pool_size_type} found.".format(pool_size_type=type(pool_size))) + + if pool_size <= 0: + raise ValueError("The passed value to the pool_size parameter cannot be <= 0.") + self.pool_size = pool_size + + if stride <= 0: + raise ValueError("The passed value to the stride parameter cannot be <= 0.") + self.stride = stride + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = (numpy.uint16((self.previous_layer.layer_output_size[0] - self.pool_size + 1)/stride + 1), + numpy.uint16((self.previous_layer.layer_output_size[1] - self.pool_size + 1)/stride + 1), + self.previous_layer.layer_output_size[-1]) + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def average_pooling(self, input2D): + + """ + Applies the average pooling operation. + + input2D: The input to which the average pooling operation is applied. + + The average_pooling() method saves its result in the layer_output attribute. + """ + + # Preparing the output of the pooling operation. + pool_out = numpy.zeros((numpy.uint16((input2D.shape[0]-self.pool_size+1)/self.stride+1), + numpy.uint16((input2D.shape[1]-self.pool_size+1)/self.stride+1), + input2D.shape[-1])) + for map_num in range(input2D.shape[-1]): + r2 = 0 + for r in numpy.arange(0,input2D.shape[0]-self.pool_size+1, self.stride): + c2 = 0 + for c in numpy.arange(0, input2D.shape[1]-self.pool_size+1, self.stride): + pool_out[r2, c2, map_num] = numpy.mean([input2D[r:r+self.pool_size, c:c+self.pool_size, map_num]]) + c2 = c2 + 1 + r2 = r2 +1 + + self.layer_output = pool_out + +class MaxPooling2D: + + """ + Similar to the AveragePooling2D class except that it implements max pooling. + """ + + def __init__(self, pool_size, previous_layer, stride=2): + + """ + pool_size: Pool size. + previous_layer: Reference to the previous layer in the CNN architecture. + stride=2: Stride + """ + + if not (type(pool_size) is int): + raise ValueError("The expected type of the pool_size is int but {pool_size_type} found.".format(pool_size_type=type(pool_size))) + + if pool_size <= 0: + raise ValueError("The passed value to the pool_size parameter cannot be <= 0.") + self.pool_size = pool_size + + if stride <= 0: + raise ValueError("The passed value to the stride parameter cannot be <= 0.") + self.stride = stride + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = (numpy.uint16((self.previous_layer.layer_output_size[0] - self.pool_size + 1)/stride + 1), + numpy.uint16((self.previous_layer.layer_output_size[1] - self.pool_size + 1)/stride + 1), + self.previous_layer.layer_output_size[-1]) + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def max_pooling(self, input2D): + + """ + Applies the max pooling operation. + + input2D: The input to which the max pooling operation is applied. + + The max_pooling() method saves its result in the layer_output attribute. + """ + + # Preparing the output of the pooling operation. + pool_out = numpy.zeros((numpy.uint16((input2D.shape[0]-self.pool_size+1)/self.stride+1), + numpy.uint16((input2D.shape[1]-self.pool_size+1)/self.stride+1), + input2D.shape[-1])) + for map_num in range(input2D.shape[-1]): + r2 = 0 + for r in numpy.arange(0,input2D.shape[0]-self.pool_size+1, self.stride): + c2 = 0 + for c in numpy.arange(0, input2D.shape[1]-self.pool_size+1, self.stride): + pool_out[r2, c2, map_num] = numpy.max([input2D[r:r+self.pool_size, c:c+self.pool_size, map_num]]) + c2 = c2 + 1 + r2 = r2 +1 + + self.layer_output = pool_out + +class ReLU: + + """ + Implementing the ReLU layer. + """ + + def __init__(self, previous_layer): + + """ + previous_layer: Reference to the previous layer. + """ + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = self.previous_layer.layer_output_size + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def relu_layer(self, layer_input): + + """ + Applies the ReLU function over all elements in input to the ReLU layer. + + layer_input: The input to which the ReLU function is applied. + + The relu_layer() method saves its result in the layer_output attribute. + """ + + self.layer_output_size = layer_input.size + self.layer_output = relu(layer_input) + +class Sigmoid: + + """ + Implementing the sigmoid layer. + """ + + def __init__(self, previous_layer): + + """ + previous_layer: Reference to the previous layer. + """ + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = self.previous_layer.layer_output_size + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def sigmoid_layer(self, layer_input): + + """ + Applies the sigmoid function over all elements in input to the sigmoid layer. + + layer_input: The input to which the sigmoid function is applied. + + The sigmoid_layer() method saves its result in the layer_output attribute. + """ + + self.layer_output_size = layer_input.size + self.layer_output = sigmoid(layer_input) + +class Flatten: + + """ + Implementing the flatten layer. + """ + + def __init__(self, previous_layer): + + """ + previous_layer: Reference to the previous layer. + """ + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = functools.reduce(lambda x, y: x*y, self.previous_layer.layer_output_size) + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def flatten(self, input2D): + + """ + Reshapes the input into a 1D vector. + + input2D: The input to the Flatten layer that will be converted into a 1D vector. + + The flatten() method saves its result in the layer_output attribute. + """ + + self.layer_output_size = input2D.size + self.layer_output = numpy.ravel(input2D) + +class Dense: + + """ + Implementing the input dense (fully connected) layer of a CNN. + """ + + def __init__(self, num_neurons, previous_layer, activation_function="relu"): + + """ + num_neurons: Number of neurons in the dense layer. + previous_layer: Reference to the previous layer. + activation_function: Name of the activation function to be used in the current layer. + """ + + if num_neurons <= 0: + raise ValueError("Number of neurons cannot be <= 0. Please pass a valid value to the 'num_neurons' parameter.") + + # Number of neurons in the dense layer. + self.num_neurons = num_neurons + + # Validating the activation function + if (activation_function == "relu"): + self.activation = relu + elif (activation_function == "sigmoid"): + self.activation = sigmoid + elif (activation_function == "softmax"): + self.activation = softmax + else: + raise ValueError("The specified activation function '{activation_function}' is not among the supported activation functions {supported_activation_functions}. Please use one of the supported functions.".format(activation_function=activation_function, supported_activation_functions=supported_activation_functions)) + + self.activation_function = activation_function + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + if type(self.previous_layer.layer_output_size) in [list, tuple, numpy.ndarray] and len(self.previous_layer.layer_output_size) > 1: + raise ValueError("The input to the dense layer must be of type int but {sh} found.".format(sh=type(self.previous_layer.layer_output_size))) + # Initializing the weights of the layer. + self.initial_weights = numpy.random.uniform(low=-0.1, + high=0.1, + size=(self.previous_layer.layer_output_size, self.num_neurons)) + + # The trained weights of the layer. Only assigned a value after the network is trained (i.e. the train_network() function completes). + # Just initialized to be equal to the initial weights + self.trained_weights = self.initial_weights.copy() + + # Size of the input to the layer. + self.layer_input_size = self.previous_layer.layer_output_size + + # Size of the output from the layer. + self.layer_output_size = num_neurons + + # The layer_output attribute holds the latest output from the layer. + self.layer_output = None + + def dense_layer(self, layer_input): + + """ + Calculates the output of the dense layer. + + layer_input: The input to the dense layer + + The dense_layer() method saves its result in the layer_output attribute. + """ + + if self.trained_weights is None: + raise TypeError("The weights of the dense layer cannot be of Type 'None'.") + + sop = numpy.matmul(layer_input, self.trained_weights) + + self.layer_output = self.activation(sop) + +class Model: + + """ + Creating a CNN model. + """ + + def __init__(self, last_layer, epochs=10, learning_rate=0.01): + + """ + last_layer: A reference to the last layer in the CNN architecture. + epochs=10: Number of epochs. + learning_rate=0.01: Learning rate. + """ + + self.last_layer = last_layer + self.epochs = epochs + self.learning_rate = learning_rate + + # The network_layers attribute is a list holding references to all CNN layers. + self.network_layers = self.get_layers() + + def get_layers(self): + + """ + Prepares a list of all layers in the CNN model. + Returns the list. + """ + + network_layers = [] + + # The last layer in the network archietcture. + layer = self.last_layer + + while "previous_layer" in layer.__init__.__code__.co_varnames: + network_layers.insert(0, layer) + layer = layer.previous_layer + + return network_layers + + def train(self, train_inputs, train_outputs): + + """ + Trains the CNN model. + It is important to note that no learning algorithm is used for training the CNN. Just the learning rate is used for making some changes which is better than leaving the weights unchanged. + + train_inputs: Training data inputs. + train_outputs: Training data outputs. + """ + + if (train_inputs.ndim != 4): + raise ValueError("The training data input has {num_dims} but it must have 4 dimensions. The first dimension is the number of training samples, the second & third dimensions represent the width and height of the sample, and the fourth dimension represents the number of channels in the sample.".format(num_dims=train_inputs.ndim)) + + if (train_inputs.shape[0] != len(train_outputs)): + raise ValueError("Mismatch between the number of input samples and number of labels: {num_samples_inputs} != {num_samples_outputs}.".format(num_samples_inputs=train_inputs.shape[0], num_samples_outputs=len(train_outputs))) + + network_predictions = [] + network_error = 0 + + for epoch in range(self.epochs): + print("Epoch {epoch}".format(epoch=epoch)) + for sample_idx in range(train_inputs.shape[0]): + # print("Sample {sample_idx}".format(sample_idx=sample_idx)) + self.feed_sample(train_inputs[sample_idx, :]) + + try: + predicted_label = numpy.where(numpy.max(self.last_layer.layer_output) == self.last_layer.layer_output)[0][0] + except IndexError: + print(self.last_layer.layer_output) + raise IndexError("Index out of range") + network_predictions.append(predicted_label) + + network_error = network_error + abs(predicted_label - train_outputs[sample_idx]) + + self.update_weights(network_error) + + def feed_sample(self, sample): + + """ + Feeds a sample in the CNN layers. + + sample: The samples to be fed to the CNN layers. + + Returns results of the last layer in the CNN. + """ + + last_layer_outputs = sample + for layer in self.network_layers: + if type(layer) is Conv2D: +# import time +# time1 = time.time() + layer.conv(input2D=last_layer_outputs) +# time2 = time.time() +# print(time2 - time1) + elif type(layer) is Dense: + layer.dense_layer(layer_input=last_layer_outputs) + elif type(layer) is MaxPooling2D: + layer.max_pooling(input2D=last_layer_outputs) + elif type(layer) is AveragePooling2D: + layer.average_pooling(input2D=last_layer_outputs) + elif type(layer) is ReLU: + layer.relu_layer(layer_input=last_layer_outputs) + elif type(layer) is Sigmoid: + layer.sigmoid_layer(layer_input=last_layer_outputs) + elif type(layer) is Flatten: + layer.flatten(input2D=last_layer_outputs) + elif type(layer) is Input2D: + pass + else: + print("Other") + raise TypeError("The layer of type {layer_type} is not supported yet.".format(layer_type=type(layer))) + + last_layer_outputs = layer.layer_output + return self.network_layers[-1].layer_output + + def update_weights(self, network_error): + + """ + Updates the weights of the CNN. + It is important to note that no learning algorithm is used for training the CNN. Just the learning rate is used for making some changes which is better than leaving the weights unchanged. + + This method loops through the layers and updates their weights. + + network_error: The network error in the last epoch. + """ + + for layer in self.network_layers: + if "trained_weights" in vars(layer).keys(): + layer.trained_weights = layer.trained_weights - network_error * self.learning_rate * layer.trained_weights + + def predict(self, data_inputs): + + """ + Uses the trained CNN for making predictions. + + data_inputs: The inputs to predict their label. + + Returns a list holding the samples predictions. + """ + + if (data_inputs.ndim != 4): + raise ValueError("The data input has {num_dims} but it must have 4 dimensions. The first dimension is the number of training samples, the second & third dimensions represent the width and height of the sample, and the fourth dimension represents the number of channels in the sample.".format(num_dims=data_inputs.ndim)) + + predictions = [] + for sample in data_inputs: + probs = self.feed_sample(sample=sample) + predicted_label = numpy.where(numpy.max(probs) == probs)[0][0] + predictions.append(predicted_label) + return predictions + + def summary(self): + + """ + Prints a summary of the CNN architecture. + """ + + print("\n----------Network Architecture----------") + for layer in self.network_layers: + print(type(layer)) + print("----------------------------------------\n") diff --git a/pygad/gacnn/__init__.py b/pygad/gacnn/__init__.py new file mode 100644 index 0000000..95cb104 --- /dev/null +++ b/pygad/gacnn/__init__.py @@ -0,0 +1,4 @@ +from .gacnn import * + +__version__ = "1.0.0" + diff --git a/pygad/gacnn/gacnn.py b/pygad/gacnn/gacnn.py new file mode 100644 index 0000000..1623c7d --- /dev/null +++ b/pygad/gacnn/gacnn.py @@ -0,0 +1,97 @@ +from ..cnn import cnn +import copy + +def population_as_vectors(population_networks): + + """ + Accepts the population as networks and returns a list holding all weights of the CNN layers of each solution (i.e. network) in the population as a vector. + If the population has 6 solutions (i.e. networks), this function accepts references to such networks and returns a list with 6 vectors, one for each network (i.e. solution). Each vector holds the weights for all layers for a single CNN. + + population_networks: A list holding references to the CNN models used in the population. + + Returns a list holding the weights vectors for all solutions (i.e. networks). + """ + + population_vectors = [] + for solution in population_networks: + # Converting the weights of single layer from the current CNN (i.e. solution) to a vector. + solution_weights_vector = cnn.layers_weights_as_vector(solution) + # Appending the weights vector of the current layer of a CNN (i.e. solution) to the weights of the previous layers of the same CNN (i.e. solution). + population_vectors.append(solution_weights_vector) + + return population_vectors + +def population_as_matrices(population_networks, population_vectors): + + """ + Accepts the population as both networks and weights vectors and returns the weights of all layers of each solution (i.e. CNN) in the population as a matrix. + If the population has 6 solutions (i.e. networks), this function returns a list with 6 matrices, one for each network holding its weights for all layers. + + population_networks: A list holding references to the output (last) layers of the neural networks used in the population. + population_vectors: A list holding the weights of all networks as vectors. Such vectors are to be converted into matrices. + + Returns a list holding the weights matrices for all solutions (i.e. networks). + """ + + population_matrices = [] + for solution, solution_weights_vector in zip(population_networks, population_vectors): + # Converting the weights of single layer from the current CNN (i.e. solution) from a vector to a matrix. + solution_weights_matrix = cnn.layers_weights_as_matrix(solution, solution_weights_vector) + # Appending the weights matrix of the current layer of a CNN (i.e. solution) to the weights of the previous layers of the same network (i.e. solution). + population_matrices.append(solution_weights_matrix) + + return population_matrices + +class GACNN: + + def create_population(self): + + """ + Creates the initial population of the genetic algorithm as a list of CNNs (i.e. solutions). Each element in the list holds a reference to the instance of the cnn.Model class. + + The method returns the list holding the references to the CNN models. + """ + + population_networks = [] + for solution in range(self.num_solutions): + + network = copy.deepcopy(self.model) + + # Appending the CNN model to the list of population networks. + population_networks.append(network) + + return population_networks + + def __init__(self, model, num_solutions): + + """ + Creates an instance of the GACNN class for training a CNN using the genetic algorithm. + The constructor of the GACNN class creates an initial population of multiple CNNs using the create_population() method. + The population returned holds references to instances of the cnn.Model class. + + model: An instance of the pygad.cnn.Model class representing the architecture of all solutions in the population. + num_solutions: Number of CNNs (i.e. solutions) in the population. Based on the value passed to this parameter, a number of identical CNNs are created where their parameters are optimized using the genetic algorithm. + """ + + self.model = model + + self.num_solutions = num_solutions + + # A list holding references to all the solutions (i.e. CNNs) used in the population. + self.population_networks = self.create_population() + + def update_population_trained_weights(self, population_trained_weights): + + """ + The `update_population_trained_weights()` method updates the `trained_weights` attribute of each CNN according to the weights passed in the `population_trained_weights` parameter. + + population_trained_weights: A list holding the trained weights of all networks as matrices. Such matrices are to be assigned to the 'trained_weights' attribute of all layers of all CNNs. + """ + + idx = 0 + # Fetches all layers weights matrices for a single solution (i.e. CNN) + for solution in self.population_networks: + # Calling the cnn.update_layers_trained_weights() function for updating the 'trained_weights' attribute for all layers in the current solution (i.e. CNN). + cnn.update_layers_trained_weights(model=solution, + final_weights=population_trained_weights[idx]) + idx = idx + 1 diff --git a/pygad/gann/__init__.py b/pygad/gann/__init__.py new file mode 100644 index 0000000..ff458d4 --- /dev/null +++ b/pygad/gann/__init__.py @@ -0,0 +1,4 @@ +from .gann import * + +__version__ = "1.0.1" + diff --git a/pygad/gann/gann.py b/pygad/gann/gann.py new file mode 100644 index 0000000..69eea98 --- /dev/null +++ b/pygad/gann/gann.py @@ -0,0 +1,269 @@ +from ..nn import nn + +def validate_network_parameters(num_neurons_input, + num_neurons_output, + num_neurons_hidden_layers, + output_activation, + hidden_activations, + num_solutions=None): + """ + Validating the parameters passed to initial_population_networks() in addition to creating a list of the name(s) of the activation function(s) for the hidden layer(s). + In case that the value passed to the 'hidden_activations' parameter is a string not a list, then a list is created by replicating the passed name a number of times equal to the number of hidden layers (i.e. the length of the 'num_neurons_hidden_layers' parameter). + If an invalid parameter found, an exception is raised and the execution stops. + + The function accepts the same parameters passed to the constructor of the GANN class. + + num_neurons_input: Number of neurons in the input layer. + num_neurons_output: Number of neurons in the output layer. + num_neurons_hidden_layers: A list holding the number of neurons in the hidden layer(s). + output_activation: The name of the activation function of the output layer. + hidden_activations: The name(s) of the activation function(s) of the hidden layer(s). + num_solutions: Number of solutions (i.e. networks) in the population which defaults to None. The reason why this function sets a default value to the `num_solutions` parameter is differentiating whether a population of networks or a single network is to be created. If `None`, then a single network will be created. If not `None`, then a population of networks is to be created. + + Returns a list holding the name(s) of the activation function(s) for the hidden layer(s). + """ + + # Validating the number of solutions within the population. + if not (num_solutions is None): + if num_solutions < 2: + raise ValueError("num_solutions: The number of solutions within the population must be at least 2. The current value is {num_solutions}.".format(num_solutions=num_solutions)) + + # Validating the number of neurons in the input layer. + if num_neurons_input is int and num_neurons_input <= 0: + raise ValueError("num_neurons_input: The number of neurons in the input layer must be > 0.") + + # Validating the number of neurons in the output layer. + if num_neurons_output is int and num_neurons_output <= 0: + raise ValueError("num_neurons_output: The number of neurons in the output layer must be > 0.") + + # Validating the type of the 'num_neurons_hidden_layers' parameter which is expected to be list or tuple. + if not (type(num_neurons_hidden_layers) in [list, tuple]): + raise TypeError("num_neurons_hidden_layers: A list or a tuple is expected but {hidden_layers_neurons_type} found.".format(hidden_layers_neurons_type=type(num_neurons_hidden_layers))) + + # Frequently used error messages. + unexpected_output_activation_value = "Output activation function: The activation function of the output layer is passed as a string not {activation_type}." + unexpected_activation_value = "Activation function: The supported values for the activation function are {supported_activations} but an unexpected value is found:\n{activations}" + unexpected_activation_type = "Activation Function: A list, tuple, or a string is expected but {activations_type} found." + length_mismatch = "Hidden activation functions: When passing the activation function(s) as a list or a tuple, its length must match the length of the 'num_neurons_hidden_layers' parameter but a mismatch is found:\n{mismatched_lengths}" + + # A list of the names of the supported activation functions. + supported_activations = ["sigmoid", "relu", "softmax", "None"] + + # Validating the output layer activation function. + if not (type(output_activation) is str): + raise ValueError(unexpected_output_activation_value.format(activation_type=type(output_activation))) + if not (output_activation in supported_activations): #activation_type + raise ValueError(unexpected_activation_value.format(activations=output_activation, supported_activations=supported_activations)) + + # Number of hidden layers. + num_hidden_layers = len(num_neurons_hidden_layers) + if num_hidden_layers > 1: # In case there are more than 1 hidden layer. + if type(hidden_activations) in [list, tuple]: + num_activations = len(hidden_activations) + if num_activations != num_hidden_layers: + raise ValueError(length_mismatch.format(mismatched_lengths="{num_activations} != {num_layers}".format(num_layers=num_hidden_layers, num_activations=num_activations))) + elif type(hidden_activations) is str: + if hidden_activations in supported_activations: + hidden_activations = [hidden_activations]*num_hidden_layers + else: + raise ValueError(unexpected_activation_value.format(supported_activations=supported_activations, activations=hidden_activations)) + else: + raise TypeError(unexpected_activation_type.format(activations_type=type(hidden_activations))) + elif num_hidden_layers == 1: # In case there is only 1 hidden layer. + if (type(hidden_activations) in [list, tuple]): + if len(hidden_activations) != 1: + raise ValueError(length_mismatch.format(mismatched_lengths="{num_activations} != {num_layers}".format(num_layers=num_hidden_layers, num_activations=len(hidden_activations)))) + elif type(hidden_activations) is str: + if not (hidden_activations in supported_activations): + raise ValueError(unexpected_activation_value.format(supported_activations=supported_activations, activations=hidden_activations)) + else: + hidden_activations = [hidden_activations] + else: + raise TypeError(unexpected_activation_type.format(activations_type=type(hidden_activations))) + else: # In case there are no hidden layers (num_hidden_layers == 0) + print("WARNING: There are no hidden layers however a value is assigned to the parameter 'hidden_activations'. It will be reset to [].".format(hidden_activations=hidden_activations)) + hidden_activations = [] + + # If the value passed to the 'hidden_activations' parameter is actually a list, then its elements are checked to make sure the listed name(s) of the activation function(s) are supported. + for act in hidden_activations: + if not (act in supported_activations): + raise ValueError(unexpected_activation_value.format(supported_activations=supported_activations, activations=act)) + + return hidden_activations + +def create_network(num_neurons_input, + num_neurons_output, + num_neurons_hidden_layers=[], + output_activation="softmax", + hidden_activations="relu", + parameters_validated=False): + """ + Creates a neural network as a linked list between the input, hidden, and output layers where the layer at index N (which is the last/output layer) references the layer at index N-1 (which is a hidden layer) using its previous_layer attribute. The input layer does not reference any layer because it is the last layer in the linked list. + + In addition to the parameters_validated parameter, this function accepts the same parameters passed to the constructor of the gann.GANN class except for the num_solutions parameter because only a single network is created out of the create_network() function. + + num_neurons_input: Number of neurons in the input layer. + num_neurons_output: Number of neurons in the output layer. + num_neurons_hidden_layers=[]: A list holding the number of neurons in the hidden layer(s). If empty [], then no hidden layers are used. For each int value it holds, then a hidden layer is created with number of hidden neurons specified by the corresponding int value. For example, num_neurons_hidden_layers=[10] creates a single hidden layer with 10 neurons. num_neurons_hidden_layers=[10, 5] creates 2 hidden layers with 10 neurons for the first and 5 neurons for the second hidden layer. + output_activation="softmax": The name of the activation function of the output layer which defaults to "softmax". + hidden_activations="relu": The name(s) of the activation function(s) of the hidden layer(s). It defaults to "relu". If passed as a string, this means the specified activation function will be used across all the hidden layers. If passed as a list, then it must has the same length as the length of the num_neurons_hidden_layers list. An exception is raised if there lengths are different. When hidden_activations is a list, a one-to-one mapping between the num_neurons_hidden_layers and hidden_activations lists occurs. + parameters_validated=False: If False, then the parameters are not validated and a call to the validate_network_parameters() function is made. + + Returns the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched. + """ + + # When parameters_validated is False, then the parameters are not yet validated and a call to validate_network_parameters() is required. + if parameters_validated == False: + # Validating the passed parameters before creating the network. + hidden_activations = validate_network_parameters(num_neurons_input=num_neurons_input, + num_neurons_output=num_neurons_output, + num_neurons_hidden_layers=num_neurons_hidden_layers, + output_activation=output_activation, + hidden_activations=hidden_activations) + + # Creating the input layer as an instance of the nn.InputLayer class. + input_layer = nn.InputLayer(num_neurons_input) + + if len(num_neurons_hidden_layers) > 0: + # If there are hidden layers, then the first hidden layer is connected to the input layer. + hidden_layer = nn.DenseLayer(num_neurons=num_neurons_hidden_layers.pop(0), + previous_layer=input_layer, + activation_function=hidden_activations.pop(0)) + # For the other hidden layers, each hidden layer is connected to its preceding hidden layer. + for hidden_layer_idx in range(len(num_neurons_hidden_layers)): + hidden_layer = nn.DenseLayer(num_neurons=num_neurons_hidden_layers.pop(0), + previous_layer=hidden_layer, + activation_function=hidden_activations.pop(0)) + + # The last hidden layer is connected to the output layer. + # The output layer is created as an instance of the nn.DenseLayer class. + output_layer = nn.DenseLayer(num_neurons=num_neurons_output, + previous_layer=hidden_layer, + activation_function=output_activation) + + # If there are no hidden layers, then the output layer is connected directly to the input layer. + elif len(num_neurons_hidden_layers) == 0: + # The output layer is created as an instance of the nn.DenseLayer class. + output_layer = nn.DenseLayer(num_neurons=num_neurons_output, + previous_layer=input_layer, + activation_function=output_activation) + + # Returning the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched. + return output_layer + +def population_as_vectors(population_networks): + """ + Accepts the population as networks and returns a list holding all weights of the layers of each solution (i.e. network) in the population as a vector. + If the population has 6 solutions (i.e. networks), this function accepts references to such networks and returns a list with 6 vectors, one for each network (i.e. solution). Each vector holds the weights for all layers for a single network. + + population_networks: A list holding references to the output (last) layers of the neural networks used in the population. + + Returns a list holding the weights vectors for all solutions (i.e. networks). + """ + population_vectors = [] + for solution in population_networks: + # Converting the weights of single layer from the current network (i.e. solution) to a vector. + solution_weights_vector = nn.layers_weights_as_vector(solution) + # Appending the weights vector of the current layer of a network (i.e. solution) to the weights of the previous layers of the same network (i.e. solution). + population_vectors.append(solution_weights_vector) + + return population_vectors + +def population_as_matrices(population_networks, population_vectors): + """ + Accepts the population as both networks and weights vectors and returns the weights of all layers of each solution (i.e. network) in the population as a matrix. + If the population has 6 solutions (i.e. networks), this function returns a list with 6 matrices, one for each network holding its weights for all layers. + + population_networks: A list holding references to the output (last) layers of the neural networks used in the population. + population_vectors: A list holding the weights of all networks as vectors. Such vectors are to be converted into matrices. + + Returns a list holding the weights matrices for all solutions (i.e. networks). + """ + population_matrices = [] + for solution, solution_weights_vector in zip(population_networks, population_vectors): + # Converting the weights of single layer from the current network (i.e. solution) from a vector to a matrix. + solution_weights_matrix = nn.layers_weights_as_matrix(solution, solution_weights_vector) + # Appending the weights matrix of the current layer of a network (i.e. solution) to the weights of the previous layers of the same network (i.e. solution). + population_matrices.append(solution_weights_matrix) + + return population_matrices + +class GANN: + def create_population(self): + """ + Creates the initial population of the genetic algorithm as a list of neural networks (i.e. solutions). Each element in the list holds a reference to the last (i.e. output) layer for the network. The method does not accept any parameter and it accesses all the required details from the `GANN` instance. + + The method returns the list holding the references to the networks. + """ + + population_networks = [] + for solution in range(self.num_solutions): + # Creating a network (i.e. solution) in the population. A network or a solution can be used interchangeably. + # .copy() is so important to avoid modification in the original vale passed to the 'num_neurons_hidden_layers' and 'hidden_activations' parameters. + network = create_network(num_neurons_input=self.num_neurons_input, + num_neurons_output=self.num_neurons_output, + num_neurons_hidden_layers=self.num_neurons_hidden_layers.copy(), + output_activation=self.output_activation, + hidden_activations=self.hidden_activations.copy(), + parameters_validated=True) + + # Appending the created network to the list of population networks. + population_networks.append(network) + + return population_networks + + def __init__(self, + num_solutions, + num_neurons_input, + num_neurons_output, + num_neurons_hidden_layers=[], + output_activation="softmax", + hidden_activations="relu"): + """ + Creates an instance of the GANN class for training a neural network using the genetic algorithm. + The constructor of the GANN class creates an initial population of multiple neural networks using the create_population() method. + The population returned holds references to the last (i.e. output) layers of all created networks. + Besides creating the initial population, the passed parameters are vaidated using the validate_network_parameters() method. + + num_solutions: Number of neural networks (i.e. solutions) in the population. Based on the value passed to this parameter, a number of identical neural networks are created where their parameters are optimized using the genetic algorithm. + num_neurons_input: Number of neurons in the input layer. + num_neurons_output: Number of neurons in the output layer. + num_neurons_hidden_layers=[]: A list holding the number of neurons in the hidden layer(s). If empty [], then no hidden layers are used. For each int value it holds, then a hidden layer is created with number of hidden neurons specified by the corresponding int value. For example, num_neurons_hidden_layers=[10] creates a single hidden layer with 10 neurons. num_neurons_hidden_layers=[10, 5] creates 2 hidden layers with 10 neurons for the first and 5 neurons for the second hidden layer. + output_activation="softmax": The name of the activation function of the output layer which defaults to "softmax". + hidden_activations="relu": The name(s) of the activation function(s) of the hidden layer(s). It defaults to "relu". If passed as a string, this means the specified activation function will be used across all the hidden layers. If passed as a list, then it must has the same length as the length of the num_neurons_hidden_layers list. An exception is raised if there lengths are different. When hidden_activations is a list, a one-to-one mapping between the num_neurons_hidden_layers and hidden_activations lists occurs. + """ + + self.parameters_validated = False # If True, then the parameters passed to the GANN class constructor are valid. + + # Validating the passed parameters before building the initial population. + hidden_activations = validate_network_parameters(num_solutions=num_solutions, + num_neurons_input=num_neurons_input, + num_neurons_output=num_neurons_output, + num_neurons_hidden_layers=num_neurons_hidden_layers, + output_activation=output_activation, + hidden_activations=hidden_activations) + + self.num_solutions = num_solutions + self.num_neurons_input = num_neurons_input + self.num_neurons_output = num_neurons_output + self.num_neurons_hidden_layers = num_neurons_hidden_layers + self.output_activation = output_activation + self.hidden_activations = hidden_activations + self.parameters_validated = True + + # After the parameters are validated, the initial population is created. + self.population_networks = self.create_population() # A list holding references to all the solutions (i.e. neural networks) used in the population. + + def update_population_trained_weights(self, population_trained_weights): + """ + The `update_population_trained_weights()` method updates the `trained_weights` attribute of each network (check the [documentation of the `pygad.nn.DenseLayer` class](https://github.com/ahmedfgad/NumPyANN#nndenselayer-class) for more information) according to the weights passed in the `population_trained_weights` parameter. + + population_trained_weights: A list holding the trained weights of all networks as matrices. Such matrices are to be assigned to the 'trained_weights' attribute of all layers of all networks. + """ + idx = 0 + # Fetches all layers weights matrices for a single solution (i.e. network) + for solution in self.population_networks: + # Calling the nn.update_layers_trained_weights() function for updating the 'trained_weights' attribute for all layers in the current solution (i.e. network). + nn.update_layers_trained_weights(last_layer=solution, + final_weights=population_trained_weights[idx]) + idx = idx + 1 diff --git a/pygad/helper/__init__.py b/pygad/helper/__init__.py new file mode 100644 index 0000000..89accfe --- /dev/null +++ b/pygad/helper/__init__.py @@ -0,0 +1,3 @@ +from pygad.helper import unique + +__version__ = "1.0.0" \ No newline at end of file diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py new file mode 100644 index 0000000..4d5e6ac --- /dev/null +++ b/pygad/helper/unique.py @@ -0,0 +1,638 @@ +""" +The pygad.helper.unique module has helper methods to solve duplicate genes and make sure every gene is unique. +""" + +import numpy +import warnings +import random +import pygad + +class Unique: + def solve_duplicate_genes_randomly(self, + solution, + min_val, + max_val, + mutation_by_replacement, + gene_type, + num_trials=10): + + """ + Solves the duplicates in a solution by randomly selecting new values for the duplicating genes. + + solution: A solution with duplicate values. + min_val: Minimum value of the range to sample a number randomly. + max_val: Maximum value of the range to sample a number randomly. + mutation_by_replacement: Identical to the self.mutation_by_replacement attribute. + gene_type: Exactly the same as the self.gene_type attribute. + num_trials: Maximum number of trials to change the gene value to solve the duplicates. + + Returns: + new_solution: Solution after trying to solve its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. + not_unique_indices: Indices of the genes with duplicate values. + num_unsolved_duplicates: Number of unsolved duplicates. + """ + + new_solution = solution.copy() + + _, unique_gene_indices = numpy.unique(solution, return_index=True) + not_unique_indices = set(range(len(solution))) - set(unique_gene_indices) + + num_unsolved_duplicates = 0 + if len(not_unique_indices) > 0: + for duplicate_index in not_unique_indices: + for trial_index in range(num_trials): + if self.gene_type_single == True: + if gene_type[0] in pygad.GA.supported_int_types: + temp_val = self.unique_int_gene_from_range(solution=new_solution, + gene_index=duplicate_index, + min_val=min_val, + max_val=max_val, + mutation_by_replacement=mutation_by_replacement, + gene_type=gene_type) + else: + temp_val = numpy.random.uniform(low=min_val, + high=max_val, + size=1) + if mutation_by_replacement: + pass + else: + temp_val = new_solution[duplicate_index] + temp_val + else: + if gene_type[duplicate_index] in pygad.GA.supported_int_types: + temp_val = self.unique_int_gene_from_range(solution=new_solution, + gene_index=duplicate_index, + min_val=min_val, + max_val=max_val, + mutation_by_replacement=mutation_by_replacement, + gene_type=gene_type) + else: + temp_val = numpy.random.uniform(low=min_val, + high=max_val, + size=1) + if mutation_by_replacement: + pass + else: + temp_val = new_solution[duplicate_index] + temp_val + + if self.gene_type_single == True: + if not gene_type[1] is None: + temp_val = numpy.round(gene_type[0](temp_val), + gene_type[1]) + else: + temp_val = gene_type[0](temp_val) + else: + if not gene_type[duplicate_index][1] is None: + temp_val = numpy.round(gene_type[duplicate_index][0](temp_val), + gene_type[duplicate_index][1]) + else: + temp_val = gene_type[duplicate_index][0](temp_val) + + if temp_val in new_solution and trial_index == (num_trials - 1): + num_unsolved_duplicates = num_unsolved_duplicates + 1 + if not self.suppress_warnings: warnings.warn("Failed to find a unique value for gene with index {gene_idx} whose value is {gene_value}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.".format(gene_idx=duplicate_index, gene_value=solution[duplicate_index])) + elif temp_val in new_solution: + continue + else: + new_solution[duplicate_index] = temp_val + break + + # Update the list of duplicate indices after each iteration. + _, unique_gene_indices = numpy.unique(new_solution, return_index=True) + not_unique_indices = set(range(len(solution))) - set(unique_gene_indices) + # self.logger.info("not_unique_indices INSIDE", not_unique_indices) + + return new_solution, not_unique_indices, num_unsolved_duplicates + + def solve_duplicate_genes_by_space(self, + solution, + gene_type, + num_trials=10, + build_initial_pop=False): + + """ + Solves the duplicates in a solution by selecting values for the duplicating genes from the gene space. + + solution: A solution with duplicate values. + gene_type: Exactly the same as the self.gene_type attribute. + num_trials: Maximum number of trials to change the gene value to solve the duplicates. + + Returns: + new_solution: Solution after trying to solve its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. + not_unique_indices: Indices of the genes with duplicate values. + num_unsolved_duplicates: Number of unsolved duplicates. + """ + + new_solution = solution.copy() + + _, unique_gene_indices = numpy.unique(solution, return_index=True) + not_unique_indices = set(range(len(solution))) - set(unique_gene_indices) + # self.logger.info("not_unique_indices OUTSIDE", not_unique_indices) + + # First try to solve the duplicates. + # For a solution like [3 2 0 0], the indices of the 2 duplicating genes are 2 and 3. + # The next call to the find_unique_value() method tries to change the value of the gene with index 3 to solve the duplicate. + if len(not_unique_indices) > 0: + new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution, + gene_type=gene_type, + not_unique_indices=not_unique_indices, + num_trials=10, + build_initial_pop=build_initial_pop) + else: + return new_solution, not_unique_indices, len(not_unique_indices) + + # Do another try if there exist duplicate genes. + # If there are no possible values for the gene 3 with index 3 to solve the duplicate, try to change the value of the other gene with index 2. + if len(not_unique_indices) > 0: + not_unique_indices = set(numpy.where(new_solution == new_solution[list(not_unique_indices)[0]])[0]) - set([list(not_unique_indices)[0]]) + new_solution, not_unique_indices, num_unsolved_duplicates = self.unique_genes_by_space(new_solution=new_solution, + gene_type=gene_type, + not_unique_indices=not_unique_indices, + num_trials=10, + build_initial_pop=build_initial_pop) + else: + # If there exist duplicate genes, then changing either of the 2 duplicating genes (with indices 2 and 3) will not solve the problem. + # This problem can be solved by randomly changing one of the non-duplicating genes that may make a room for a unique value in one the 2 duplicating genes. + # For example, if gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]] and the solution is [3 2 0 0], then the values of the last 2 genes duplicate. + # There are no possible changes in the last 2 genes to solve the problem. But it could be solved by changing the second gene from 2 to 4. + # As a result, any of the last 2 genes can take the value 2 and solve the duplicates. + return new_solution, not_unique_indices, len(not_unique_indices) + + return new_solution, not_unique_indices, num_unsolved_duplicates + + def unique_int_gene_from_range(self, + solution, + gene_index, + min_val, + max_val, + mutation_by_replacement, + gene_type, + step=None): + + """ + Finds a unique integer value for the gene. + + solution: A solution with duplicate values. + gene_index: Index of the gene to find a unique value. + min_val: Minimum value of the range to sample a number randomly. + max_val: Maximum value of the range to sample a number randomly. + mutation_by_replacement: Identical to the self.mutation_by_replacement attribute. + gene_type: Exactly the same as the self.gene_type attribute. + + Returns: + selected_value: The new value of the gene. It may be identical to the original gene value in case there are no possible unique values for the gene. + """ + + if self.gene_type_single == True: + if step is None: + all_gene_values = numpy.arange(min_val, max_val, dtype=gene_type[0]) + else: + # For non-integer steps, the numpy.arange() function returns zeros id the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0]) + # To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function. + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), dtype=gene_type[0]) + else: + if step is None: + all_gene_values = numpy.arange(min_val, max_val, dtype=gene_type[gene_index][0]) + else: + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), dtype=gene_type[gene_index][0]) + + if mutation_by_replacement: + pass + else: + all_gene_values = all_gene_values + solution[gene_index] + + if self.gene_type_single == True: + if not gene_type[1] is None: + all_gene_values = numpy.round(gene_type[0](all_gene_values), + gene_type[1]) + else: + if type(all_gene_values) is numpy.ndarray: + all_gene_values = numpy.asarray(all_gene_values, dtype=gene_type[0]) + else: + all_gene_values = gene_type[0](all_gene_values) + else: + if not gene_type[gene_index][1] is None: + all_gene_values = numpy.round(gene_type[gene_index][0](all_gene_values), + gene_type[gene_index][1]) + else: + all_gene_values = gene_type[gene_index][0](all_gene_values) + + values_to_select_from = list(set(all_gene_values) - set(solution)) + + if len(values_to_select_from) == 0: + if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but there is no enough values to prevent duplicates.") + selected_value = solution[gene_index] + else: + selected_value = random.choice(values_to_select_from) + + #if self.gene_type_single == True: + # selected_value = gene_type[0](selected_value) + #else: + # selected_value = gene_type[gene_index][0](selected_value) + + return selected_value + + def unique_genes_by_space(self, + new_solution, + gene_type, + not_unique_indices, + num_trials=10, + build_initial_pop=False): + + """ + Loops through all the duplicating genes to find unique values that from their gene spaces to solve the duplicates. + For each duplicating gene, a call to the unique_gene_by_space() function is made. + + new_solution: A solution with duplicate values. + gene_type: Exactly the same as the self.gene_type attribute. + not_unique_indices: Indices with duplicating values. + num_trials: Maximum number of trials to change the gene value to solve the duplicates. + + Returns: + new_solution: Solution after trying to solve all of its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. + not_unique_indices: Indices of the genes with duplicate values. + num_unsolved_duplicates: Number of unsolved duplicates. + """ + + num_unsolved_duplicates = 0 + for duplicate_index in not_unique_indices: + for trial_index in range(num_trials): + temp_val = self.unique_gene_by_space(solution=new_solution, + gene_idx=duplicate_index, + gene_type=gene_type, + build_initial_pop=build_initial_pop) + + if temp_val in new_solution and trial_index == (num_trials - 1): + # self.logger.info("temp_val, duplicate_index", temp_val, duplicate_index, new_solution) + num_unsolved_duplicates = num_unsolved_duplicates + 1 + if not self.suppress_warnings: warnings.warn("Failed to find a unique value for gene with index {gene_idx} whose value is {gene_value}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.".format(gene_idx=duplicate_index, gene_value=new_solution[duplicate_index])) + elif temp_val in new_solution: + continue + else: + new_solution[duplicate_index] = temp_val + # self.logger.info("SOLVED", duplicate_index) + break + + # Update the list of duplicate indices after each iteration. + _, unique_gene_indices = numpy.unique(new_solution, return_index=True) + not_unique_indices = set(range(len(new_solution))) - set(unique_gene_indices) + # self.logger.info("not_unique_indices INSIDE", not_unique_indices) + + return new_solution, not_unique_indices, num_unsolved_duplicates + + def unique_gene_by_space(self, + solution, + gene_idx, + gene_type, + build_initial_pop=False): + + """ + Returns a unique gene value for a single gene based on its value space to solve the duplicates. + + solution: A solution with duplicate values. + gene_idx: The index of the gene that duplicates its value with another gene. + gene_type: Exactly the same as the self.gene_type attribute. + + Returns: + A unique value, if exists, for the gene. + """ + + if self.gene_space_nested: + # Returning the current gene space from the 'gene_space' attribute. + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + curr_gene_space = self.gene_space[gene_idx].copy() + else: + curr_gene_space = self.gene_space[gene_idx] + + # If the gene space has only a single value, use it as the new gene value. + if type(curr_gene_space) in pygad.GA.supported_int_float_types: + value_from_space = curr_gene_space + # If the gene space is None, apply mutation by adding a random value between the range defined by the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + elif curr_gene_space is None: + if self.gene_type_single == True: + if gene_type[0] in pygad.GA.supported_int_types: + if build_initial_pop == True: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + # min_val=self.random_mutation_min_val, + # max_val=self.random_mutation_max_val, + min_val=self.init_range_low, + max_val=self.init_range_high, + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if build_initial_pop == True: + value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, + # high=self.random_mutation_max_val, + low=self.init_range_low, + high=self.init_range_high, + size=1) + else: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + else: + if gene_type[gene_idx] in pygad.GA.supported_int_types: + if build_initial_pop == True: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + # min_val=self.random_mutation_min_val, + # max_val=self.random_mutation_max_val, + min_val=self.init_range_low, + max_val=self.init_range_high, + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if build_initial_pop == True: + value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, + # high=self.random_mutation_max_val, + low=self.init_range_low, + high=self.init_range_high, + size=1) + else: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + + elif type(curr_gene_space) is dict: + if self.gene_type_single == True: + if gene_type[0] in pygad.GA.supported_int_types: + if build_initial_pop == True: + if 'step' in curr_gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=curr_gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in curr_gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=curr_gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + else: + if gene_type[gene_idx] in pygad.GA.supported_int_types: + if build_initial_pop == True: + if 'step' in curr_gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=curr_gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in curr_gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=curr_gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + + else: + # Selecting a value randomly based on the current gene's space in the 'gene_space' attribute. + # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. + if len(curr_gene_space) == 1: + value_from_space = curr_gene_space[0] + if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but the space of the gene with index {gene_idx} has only a single value. Thus, duplicates are possible.".format(gene_idx=gene_idx)) + # If the gene space has more than 1 value, then select a new one that is different from the current value. + else: + values_to_select_from = list(set(curr_gene_space) - set(solution)) + + if len(values_to_select_from) == 0: + if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but the gene space does not have enough values to prevent duplicates.") + value_from_space = solution[gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + else: + # Selecting a value randomly from the global gene space in the 'gene_space' attribute. + if type(self.gene_space) is dict: + if self.gene_type_single == True: + if gene_type[0] in pygad.GA.supported_int_types: + if build_initial_pop == True: + if 'step' in self.gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=self.gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in self.gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=self.gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + # When the gene_space is assigned a dict object, then it specifies the lower and upper limits of all genes in the space. + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + else: + if gene_type[gene_idx] in pygad.GA.supported_int_types: + if build_initial_pop == True: + if 'step' in self.gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=self.gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + if 'step' in self.gene_space.keys(): + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=self.gene_space['step'], + mutation_by_replacement=True, + gene_type=gene_type) + else: + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=None, + mutation_by_replacement=True, + gene_type=gene_type) + else: + # When the gene_space is assigned a dict object, then it specifies the lower and upper limits of all genes in the space. + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + if self.mutation_by_replacement: + pass + else: + value_from_space = solution[gene_idx] + value_from_space + + else: + # If the space type is not of type dict, then a value is randomly selected from the gene_space attribute. + # Remove all the genes in the current solution from the gene_space. + # This only leaves the unique values that could be selected for the gene. + values_to_select_from = list(set(self.gene_space) - set(solution)) + + if len(values_to_select_from) == 0: + if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but the gene space does not have enough values to prevent duplicates.") + value_from_space = solution[gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + + if value_from_space is None: + if build_initial_pop == True: + value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, + # high=self.random_mutation_max_val, + low=self.init_range_low, + high=self.init_range_high, + size=1) + else: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + + if self.gene_type_single == True: + if not gene_type[1] is None: + value_from_space = numpy.round(gene_type[0](value_from_space), + gene_type[1]) + else: + value_from_space = gene_type[0](value_from_space) + else: + if not gene_type[gene_idx][1] is None: + value_from_space = numpy.round(gene_type[gene_idx][0](value_from_space), + gene_type[gene_idx][1]) + else: + value_from_space = gene_type[gene_idx][0](value_from_space) + + return value_from_space diff --git a/pygad/kerasga/__init__.py b/pygad/kerasga/__init__.py new file mode 100644 index 0000000..7e6b139 --- /dev/null +++ b/pygad/kerasga/__init__.py @@ -0,0 +1,3 @@ +from .kerasga import * + +__version__ = "1.2.0" diff --git a/pygad/kerasga/kerasga.py b/pygad/kerasga/kerasga.py new file mode 100644 index 0000000..0e1b618 --- /dev/null +++ b/pygad/kerasga/kerasga.py @@ -0,0 +1,89 @@ +import copy +import numpy +import tensorflow.keras + +def model_weights_as_vector(model): + weights_vector = [] + + for layer in model.layers: # model.get_weights(): + if layer.trainable: + layer_weights = layer.get_weights() + for l_weights in layer_weights: + vector = numpy.reshape(l_weights, newshape=(l_weights.size)) + weights_vector.extend(vector) + + return numpy.array(weights_vector) + +def model_weights_as_matrix(model, weights_vector): + weights_matrix = [] + + start = 0 + for layer_idx, layer in enumerate(model.layers): # model.get_weights(): + # for w_matrix in model.get_weights(): + layer_weights = layer.get_weights() + if layer.trainable: + for l_weights in layer_weights: + layer_weights_shape = l_weights.shape + layer_weights_size = l_weights.size + + layer_weights_vector = weights_vector[start:start + layer_weights_size] + layer_weights_matrix = numpy.reshape(layer_weights_vector, newshape=(layer_weights_shape)) + weights_matrix.append(layer_weights_matrix) + + start = start + layer_weights_size + else: + for l_weights in layer_weights: + weights_matrix.append(l_weights) + + return weights_matrix + +def predict(model, solution, data): + # Fetch the parameters of the best solution. + solution_weights = model_weights_as_matrix(model=model, + weights_vector=solution) + _model = tensorflow.keras.models.clone_model(model) + _model.set_weights(solution_weights) + predictions = _model.predict(data) + + return predictions + +class KerasGA: + + def __init__(self, model, num_solutions): + + """ + Creates an instance of the KerasGA class to build a population of model parameters. + + model: A Keras model class. + num_solutions: Number of solutions in the population. Each solution has different model parameters. + """ + + self.model = model + + self.num_solutions = num_solutions + + # A list holding references to all the solutions (i.e. networks) used in the population. + self.population_weights = self.create_population() + + def create_population(self): + + """ + Creates the initial population of the genetic algorithm as a list of networks' weights (i.e. solutions). Each element in the list holds a different weights of the Keras model. + + The method returns a list holding the weights of all solutions. + """ + + model_weights_vector = model_weights_as_vector(model=self.model) + + net_population_weights = [] + net_population_weights.append(model_weights_vector) + + for idx in range(self.num_solutions-1): + + net_weights = copy.deepcopy(model_weights_vector) + net_weights = numpy.array(net_weights) + numpy.random.uniform(low=-1.0, high=1.0, size=model_weights_vector.size) + + # Appending the weights to the population. + net_population_weights.append(net_weights) + + return net_population_weights diff --git a/pygad/nn/__init__.py b/pygad/nn/__init__.py new file mode 100644 index 0000000..224d984 --- /dev/null +++ b/pygad/nn/__init__.py @@ -0,0 +1,4 @@ +from .nn import * + +__version__ = "1.2.1" + diff --git a/pygad/nn/nn.py b/pygad/nn/nn.py new file mode 100644 index 0000000..04933d7 --- /dev/null +++ b/pygad/nn/nn.py @@ -0,0 +1,399 @@ +import numpy +import functools + +""" +This project creates a neural network where the architecture has input and dense layers only. More layers will be added in the future. +The project only implements the forward pass of a neural network and no training algorithm is used. +For training a neural network using the genetic algorithm, check this project (https://github.com/ahmedfgad/NeuralGenetic) in which the genetic algorithm is used for training the network. +Feel free to leave an issue in this project (https://github.com/ahmedfgad/NumPyANN) in case something is not working properly or to ask for questions. I am also available for e-mails at ahmed.f.gad@gmail.com +""" + +def layers_weights(last_layer, initial=True): + """ + Creates a list holding the weights of all layers in the neural network. + + last_layer: A reference to the last (output) layer in the network architecture. + initial: When True, the function returns the initial weights of the layers. When False, the trained weights of the layers are returned. The initial weights are only needed before network training starts. The trained weights are needed to predict the network outputs. + + Returns a list (network_weights) holding the weights of the layers. + """ + network_weights = [] + + layer = last_layer + while "previous_layer" in layer.__init__.__code__.co_varnames: + # If the 'initial' parameter is True, append the initial weights. Otherwise, append the trained weights. + if initial == True: + network_weights.append(layer.initial_weights) + elif initial == False: + network_weights.append(layer.trained_weights) + else: + raise ValueError("Unexpected value to the 'initial' parameter: {initial}.".format(initial=initial)) + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the InputLayer class), raise an error. + if not (type(layer) is InputLayer): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return network_weights + +def layers_weights_as_vector(last_layer, initial=True): + """ + Creates a list holding the weights of each layer in the network as a vector. + + last_layer: A reference to the last (output) layer in the network architecture. + initial: When True, the function returns the initial weights of the layers. When False, the trained weights of the layers are returned. The initial weights are only needed before network training starts. The trained weights are needed to predict the network outputs. + + Returns a list (network_weights) holding the weights of the layers as a vector. + """ + network_weights = [] + + layer = last_layer + while "previous_layer" in layer.__init__.__code__.co_varnames: + # If the 'initial' parameter is True, append the initial weights. Otherwise, append the trained weights. + if initial == True: + vector = numpy.reshape(layer.initial_weights, newshape=(layer.initial_weights.size)) +# vector = DenseLayer.to_vector(matrix=layer.initial_weights) + network_weights.extend(vector) + elif initial == False: + vector = numpy.reshape(layer.trained_weights, newshape=(layer.trained_weights.size)) +# vector = DenseLayer.to_vector(array=layer.trained_weights) + network_weights.extend(vector) + else: + raise ValueError("Unexpected value to the 'initial' parameter: {initial}.".format(initial=initial)) + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the InputLayer class), raise an error. + if not (type(layer) is InputLayer): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return numpy.array(network_weights) + +def layers_weights_as_matrix(last_layer, vector_weights): + """ + Converts the network weights from vectors to matrices. + + last_layer: A reference to the last (output) layer in the network architecture. + vector_weights: The network weights as vectors where the weights of each layer form a single vector. + + Returns a list (network_weights) holding the weights of the layers as matrices. + """ + network_weights = [] + + start = 0 + layer = last_layer + vector_weights = vector_weights[::-1] + while "previous_layer" in layer.__init__.__code__.co_varnames: + layer_weights_shape = layer.initial_weights.shape + layer_weights_size = layer.initial_weights.size + + weights_vector=vector_weights[start:start + layer_weights_size] +# matrix = DenseLayer.to_array(vector=weights_vector, shape=layer_weights_shape) + matrix = numpy.reshape(weights_vector, newshape=(layer_weights_shape)) + network_weights.append(matrix) + + start = start + layer_weights_size + + # Go to the previous layer. + layer = layer.previous_layer + + # If the first layer in the network is not an input layer (i.e. an instance of the InputLayer class), raise an error. + if not (type(layer) is InputLayer): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the weights of the layers are in the reverse order. In other words, the weights of the first layer are at the last index of the 'network_weights' list while the weights of the last layer are at the first index. + # Reversing the 'network_weights' list to order the layers' weights according to their location in the network architecture (i.e. the weights of the first layer appears at index 0 of the list). + network_weights.reverse() + return network_weights + +def layers_activations(last_layer): + """ + Creates a list holding the activation functions of all layers in the network. + + last_layer: A reference to the last (output) layer in the network architecture. + + Returns a list (activations) holding the activation functions of the layers. + """ + activations = [] + + layer = last_layer + while "previous_layer" in layer.__init__.__code__.co_varnames: + activations.append(layer.activation_function) + + # Go to the previous layer. + layer = layer.previous_layer + + if not (type(layer) is InputLayer): + raise TypeError("The first layer in the network architecture must be an input layer.") + + # Currently, the activations of layers are in the reverse order. In other words, the activation function of the first layer are at the last index of the 'activations' list while the activation function of the last layer are at the first index. + # Reversing the 'activations' list to order the layers' weights according to their location in the network architecture (i.e. the activation function of the first layer appears at index 0 of the list). + activations.reverse() + return activations + +def sigmoid(sop): + + """ + Applies the sigmoid function. + + sop: The input to which the sigmoid function is applied. + + Returns the result of the sigmoid function. + """ + + if type(sop) in [list, tuple]: + sop = numpy.array(sop) + + return 1.0 / (1 + numpy.exp(-1 * sop)) + +def relu(sop): + + """ + Applies the rectified linear unit (ReLU) function. + + sop: The input to which the relu function is applied. + + Returns the result of the ReLU function. + """ + + if not (type(sop) in [list, tuple, numpy.ndarray]): + if sop < 0: + return 0 + else: + return sop + elif type(sop) in [list, tuple]: + sop = numpy.array(sop) + + result = sop + result[sop < 0] = 0 + + return result + +def softmax(layer_outputs): + + """ + Applies the sotmax function. + + sop: The input to which the softmax function is applied. + + Returns the result of the softmax function. + """ + return layer_outputs / (numpy.sum(layer_outputs) + 0.000001) + +def train(num_epochs, + last_layer, + data_inputs, + data_outputs, + problem_type="classification", + learning_rate=0.01): + """ + Trains the neural network. + + num_epochs: Number of epochs. + last_layer: Reference to the last (output) layer in the network architecture. + data_inputs: Data features. + data_outputs: Data outputs. + problem_type: Can be either classification or regression to define the problem type. + learning_rate: Learning rate which defaults to 0.01. + """ + + if not (problem_type in ["classification", "regression"]): + raise ValueError("The value of the problem_type parameter can be either classification or regression but {problem_type_val} found.".format(problem_type_val=problem_type)) + + # To fetch the initial weights of the layer, the 'initial' argument is set to True. + weights = layers_weights(last_layer, initial=True) + activations = layers_activations(last_layer) + + network_error = 0 + for epoch in range(num_epochs): + print("Epoch ", epoch) + for sample_idx in range(data_inputs.shape[0]): + r1 = data_inputs[sample_idx, :] + for idx in range(len(weights) - 1): + curr_weights = weights[idx] + r1 = numpy.matmul(r1, curr_weights) + if activations[idx] == "relu": + r1 = relu(r1) + elif activations[idx] == "sigmoid": + r1 = sigmoid(r1) + elif activations[idx] == "softmax": + r1 = softmax(r1) + elif activations[idx] == None: + pass + + curr_weights = weights[-1] + r1 = numpy.matmul(r1, curr_weights) + + if problem_type == "classification": + prediction = numpy.where(r1 == numpy.max(r1))[0][0] + else: + prediction = r1 + + network_error = network_error + numpy.mean(numpy.abs((prediction - data_outputs[sample_idx]))) + + # Updating the network weights once after completing an epoch (i.e. passing through all the samples). + weights = update_weights(weights=weights, + network_error=network_error, + learning_rate=learning_rate) + + # Initially, the 'trained_weights' attribute of the layers are set to None. After the is trained, the 'trained_weights' attribute is updated by the trained weights using the update_layers_trained_weights() function. + update_layers_trained_weights(last_layer, weights) + +def update_weights(weights, network_error, learning_rate): + """ + Updates the network weights using the learning rate only. + The purpose of this project is to only apply the forward pass of training a neural network. Thus, there is no optimization algorithm is used like the gradient descent. + For optimizing the neural network, check this project (https://github.com/ahmedfgad/NeuralGenetic) in which the genetic algorithm is used for training the network. + + weights: The current weights of the network. + network_error: The network error. + learning_rate: The learning rate. + + It returns the new weights. + """ + # weights = numpy.array(weights) + for layer_idx in range(len(weights)): + weights[layer_idx] = network_error * learning_rate * weights[layer_idx] + + return weights + +def update_layers_trained_weights(last_layer, final_weights): + """ + After the network weights are trained, the 'trained_weights' attribute of each layer is updated by the weights calculated after passing all the epochs (such weights are passed in the 'final_weights' parameter). + By just passing a reference to the last layer in the network (i.e. output layer) in addition to the final weights, this function updates the 'trained_weights' attribute of all layers. + + last_layer: A reference to the last (output) layer in the network architecture. + final_weights: An array of weights of all layers in the network after passing through all the epochs. + """ + layer = last_layer + layer_idx = len(final_weights) - 1 + while "previous_layer" in layer.__init__.__code__.co_varnames: + layer.trained_weights = final_weights[layer_idx] + + layer_idx = layer_idx - 1 + # Go to the previous layer. + layer = layer.previous_layer + +def predict(last_layer, data_inputs, problem_type="classification"): + """ + Uses the trained weights for predicting the samples' outputs. + + last_layer: A reference to the last (output) layer in the network architecture. + data_inputs: Data features. + problem_type: Can be either classification or regression to define the problem type. + + Returns the predictions of all samples. + """ + if not (problem_type in ["classification", "regression"]): + raise ValueError("The value of the problem_type parameter can be either classification or regression but {problem_type_val} found.".format(problem_type_val=problem_type)) + + # To fetch the trained weights of the layer, the 'initial' argument is set to False. + weights = layers_weights(last_layer, initial=False) + activations = layers_activations(last_layer) + + if len(weights) != len(activations): + raise TypeError("The length of layers {num_layers} is not equal to the number of activations functions {num_activations} and they must be equal.".format(num_layers=len(weights), num_activations=len(activations))) + + predictions = [] + for sample_idx in range(data_inputs.shape[0]): + r1 = data_inputs[sample_idx, :] + for curr_weights, activation in zip(weights, activations): + r1 = numpy.matmul(r1, curr_weights) + if activation == "relu": + r1 = relu(r1) + elif activation == "sigmoid": + r1 = sigmoid(r1) + elif activation == "softmax": + r1 = softmax(r1) + elif activation == None: + pass + + if problem_type == "classification": + prediction = numpy.where(r1 == numpy.max(r1))[0][0] + else: + prediction = r1 + + predictions.append(prediction) + + return predictions + +def to_vector(array): + """ + Converts a passed NumPy array (of any dimensionality) to its `array` parameter into a 1D vector and returns the vector. + + array: The NumPy array to be converted into a 1D vector. + + Returns the array after being reshaped into a NumPy 1D vector. + + Example: weights_vector = nn.DenseLayer.to_vector(array=array) + """ + if not (type(array) is numpy.ndarray): + raise TypeError("An input of type numpy.ndarray is expected but an input of type {in_type} found.".format(in_type=type(array))) + return numpy.reshape(array, newshape=(array.size)) + +def to_array(vector, shape): + """ + Converts a passed vector to its `vector` parameter into a NumPy array and returns the array. + + vector: The 1D vector to be converted into an array. + shape: The target shape of the array. + + Returns the NumPy 1D vector after being reshaped into an array. + + Example: weights_matrix = nn.DenseLayer.to_array(vector=vector, shape=shape) + """ + if not (type(vector) is numpy.ndarray): + raise TypeError("An input of type numpy.ndarray is expected but an input of type {in_type} found.".format(in_type=type(vector))) + if vector.ndim > 1: + raise ValueError("A 1D NumPy array is expected but an array of {ndim} dimensions found.".format(ndim=vector.ndim)) + if vector.size != functools.reduce(lambda x,y:x*y, shape, 1): # (operator.mul == lambda x,y:x*y + raise ValueError("Mismatch between the vector length and the array shape. A vector of length {vector_length} cannot be converted into a array of shape ({array_shape}).".format(vector_length=vector.size, array_shape=shape)) + return numpy.reshape(vector, newshape=shape) + +class InputLayer: + """ + Implementing the input layer of a neural network. + """ + def __init__(self, num_inputs): + if num_inputs <= 0: + raise ValueError("Number of input neurons cannot be <= 0. Please pass a valid value to the 'num_inputs' parameter.") + # The number of neurons in the input layer. + self.num_neurons = num_inputs + +class DenseLayer: + """ + Implementing the input dense (fully connected) layer of a neural network. + """ + def __init__(self, num_neurons, previous_layer, activation_function="sigmoid"): + if num_neurons <= 0: + raise ValueError("Number of neurons cannot be <= 0. Please pass a valid value to the 'num_neurons' parameter.") + # Number of neurons in the dense layer. + self.num_neurons = num_neurons + + supported_activation_functions = ("sigmoid", "relu", "softmax", "None") + if not (activation_function in supported_activation_functions): + raise ValueError("The specified activation function '{activation_function}' is not among the supported activation functions {supported_activation_functions}. Please use one of the supported functions.".format(activation_function=activation_function, supported_activation_functions=supported_activation_functions)) + self.activation_function = activation_function + + if previous_layer is None: + raise TypeError("The previous layer cannot be of Type 'None'. Please pass a valid layer to the 'previous_layer' parameter.") + # A reference to the layer that preceeds the current layer in the network architecture. + self.previous_layer = previous_layer + + # Initializing the weights of the layer. + self.initial_weights = numpy.random.uniform(low=-0.1, + high=0.1, + size=(previous_layer.num_neurons, num_neurons)) + + # The trained weights of the layer. Only assigned a value after the network is trained (i.e. the train() function completes). + # Just initialized to be equal to the initial weights + self.trained_weights = self.initial_weights.copy() \ No newline at end of file diff --git a/pygad/pygad.py b/pygad/pygad.py new file mode 100644 index 0000000..9916a15 --- /dev/null +++ b/pygad/pygad.py @@ -0,0 +1,2219 @@ +import numpy +import random +import cloudpickle +import time +import warnings +import concurrent.futures +import inspect +import logging +from pygad import utils +from pygad import helper +from pygad import visualize + +class GA(utils.parent_selection.ParentSelection, + utils.crossover.Crossover, + utils.mutation.Mutation, + helper.unique.Unique, + visualize.plot.Plot): + + supported_int_types = [int, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64] + supported_float_types = [float, numpy.float16, numpy.float32, numpy.float64] + supported_int_float_types = supported_int_types + supported_float_types + + def __init__(self, + num_generations, + num_parents_mating, + fitness_func, + fitness_batch_size=None, + initial_population=None, + sol_per_pop=None, + num_genes=None, + init_range_low=-4, + init_range_high=4, + gene_type=float, + parent_selection_type="sss", + keep_parents=-1, + keep_elitism=1, + K_tournament=3, + crossover_type="single_point", + crossover_probability=None, + mutation_type="random", + mutation_probability=None, + mutation_by_replacement=False, + mutation_percent_genes='default', + mutation_num_genes=None, + random_mutation_min_val=-1.0, + random_mutation_max_val=1.0, + gene_space=None, + allow_duplicate_genes=True, + on_start=None, + on_fitness=None, + on_parents=None, + on_crossover=None, + on_mutation=None, + on_generation=None, + on_stop=None, + delay_after_gen=0.0, + save_best_solutions=False, + save_solutions=False, + suppress_warnings=False, + stop_criteria=None, + parallel_processing=None, + random_seed=None, + logger=None): + + """ + The constructor of the GA class accepts all parameters required to create an instance of the GA class. It validates such parameters. + + num_generations: Number of generations. + num_parents_mating: Number of solutions to be selected as parents in the mating pool. + + fitness_func: Accepts a function/method and returns the fitness value of the solution. In PyGAD 2.20.0, a third parameter is passed referring to the 'pygad.GA' instance. If method, then it must accept 4 parameters where the fourth one refers to the method's object. + fitness_batch_size: Added in PyGAD 2.19.0. Supports calculating the fitness in batches. If the value is 1 or None, then the fitness function is called for each invidiaul solution. If given another value X where X is neither 1 nor None (e.g. X=3), then the fitness function is called once for each X (3) solutions. + + initial_population: A user-defined initial population. It is useful when the user wants to start the generations with a custom initial population. It defaults to None which means no initial population is specified by the user. In this case, PyGAD creates an initial population using the 'sol_per_pop' and 'num_genes' parameters. An exception is raised if the 'initial_population' is None while any of the 2 parameters ('sol_per_pop' or 'num_genes') is also None. + sol_per_pop: Number of solutions in the population. + num_genes: Number of parameters in the function. + + init_range_low: The lower value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20 and higher. + init_range_high: The upper value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20. + # It is OK to set the value of any of the 2 parameters ('init_range_low' and 'init_range_high') to be equal, higher or lower than the other parameter (i.e. init_range_low is not needed to be lower than init_range_high). + + gene_type: The type of the gene. It is assigned to any of these types (int, float, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64, numpy.float16, numpy.float32, numpy.float64) and forces all the genes to be of that type. + + parent_selection_type: Type of parent selection. + keep_parents: If 0, this means no parent in the current population will be used in the next population. If -1, this means all parents in the current population will be used in the next population. If set to a value > 0, then the specified value refers to the number of parents in the current population to be used in the next population. Some parent selection operators such as rank selection, favor population diversity and therefore keeping the parents in the next generation can be beneficial. However, some other parent selection operators, such as roulette wheel selection (RWS), have higher selection pressure and keeping more than one parent in the next generation can seriously harm population diversity. This parameter have an effect only when the keep_elitism parameter is 0. Thanks to Prof. Fernando Jiménez Barrionuevo (http://webs.um.es/fernan) for editing this sentence. + K_tournament: When the value of 'parent_selection_type' is 'tournament', the 'K_tournament' parameter specifies the number of solutions from which a parent is selected randomly. + + keep_elitism: Added in PyGAD 2.18.0. It can take the value 0 or a positive integer that satisfies (0 <= keep_elitism <= sol_per_pop). It defaults to 1 which means only the best solution in the current generation is kept in the next generation. If assigned 0, this means it has no effect. If assigned a positive integer K, then the best K solutions are kept in the next generation. It cannot be assigned a value greater than the value assigned to the sol_per_pop parameter. If this parameter has a value different than 0, then the keep_parents parameter will have no effect. + + crossover_type: Type of the crossover opreator. If crossover_type=None, then the crossover step is bypassed which means no crossover is applied and thus no offspring will be created in the next generations. The next generation will use the solutions in the current population. + crossover_probability: The probability of selecting a solution for the crossover operation. If the solution probability is <= crossover_probability, the solution is selected. The value must be between 0 and 1 inclusive. + + mutation_type: Type of the mutation opreator. If mutation_type=None, then the mutation step is bypassed which means no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. + mutation_probability: The probability of selecting a gene for the mutation operation. If the gene probability is <= mutation_probability, the gene is selected. It accepts either a single value for fixed mutation or a list/tuple/numpy.ndarray of 2 values for adaptive mutation. The values must be between 0 and 1 inclusive. If specified, then no need for the 2 parameters mutation_percent_genes and mutation_num_genes. + + mutation_by_replacement: An optional bool parameter. It works only when the selected type of mutation is random (mutation_type="random"). In this case, setting mutation_by_replacement=True means replace the gene by the randomly generated value. If False, then it has no effect and random mutation works by adding the random value to the gene. + + mutation_percent_genes: Percentage of genes to mutate which defaults to the string 'default' which means 10%. This parameter has no action if any of the 2 parameters mutation_probability or mutation_num_genes exist. + mutation_num_genes: Number of genes to mutate which defaults to None. If the parameter mutation_num_genes exists, then no need for the parameter mutation_percent_genes. This parameter has no action if the mutation_probability parameter exists. + random_mutation_min_val: The minimum value of the range from which a random value is selected to be added to the selected gene(s) to mutate. It defaults to -1.0. + random_mutation_max_val: The maximum value of the range from which a random value is selected to be added to the selected gene(s) to mutate. It defaults to 1.0. + + gene_space: It accepts a list of all possible values of the gene. This list is used in the mutation step. Should be used only if the gene space is a set of discrete values. No need for the 2 parameters (random_mutation_min_val and random_mutation_max_val) if the parameter gene_space exists. Added in PyGAD 2.5.0. In PyGAD 2.11.0, the gene_space can be assigned a dict. + + on_start: Accepts a function/method to be called only once before the genetic algorithm starts its evolution. If function, then it must accept a single parameter representing the instance of the genetic algorithm. If method, then it must accept 2 parameters where the second one refers to the method's object. Added in PyGAD 2.6.0. + on_fitness: Accepts a function/method to be called after calculating the fitness values of all solutions in the population. If function, then it must accept 2 parameters: 1) a list of all solutions' fitness values 2) the instance of the genetic algorithm. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. + on_parents: Accepts a function/method to be called after selecting the parents that mates. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the selected parents. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. + on_crossover: Accepts a function/method to be called each time the crossover operation is applied. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring generated using crossover. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. + on_mutation: Accepts a function/method to be called each time the mutation operation is applied. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring after applying the mutation. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. + on_generation: Accepts a function/method to be called after each generation. If function, then it must accept a single parameter representing the instance of the genetic algorithm. If the function returned "stop", then the run() method stops without completing the other generations. If method, then it must accept 2 parameters where the second one refers to the method's object. Added in PyGAD 2.6.0. + on_stop: Accepts a function/method to be called only once exactly before the genetic algorithm stops or when it completes all the generations. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one is a list of fitness values of the last population's solutions. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. + + delay_after_gen: Added in PyGAD 2.4.0. It accepts a non-negative number specifying the number of seconds to wait after a generation completes and before going to the next generation. It defaults to 0.0 which means no delay after the generation. + + save_best_solutions: Added in PyGAD 2.9.0 and its type is bool. If True, then the best solution in each generation is saved into the 'best_solutions' attribute. Use this parameter with caution as it may cause memory overflow when either the number of generations or the number of genes is large. + save_solutions: Added in PyGAD 2.15.0 and its type is bool. If True, then all solutions in each generation are saved into the 'solutions' attribute. Use this parameter with caution as it may cause memory overflow when either the number of generations, number of genes, or number of solutions in population is large. + + suppress_warnings: Added in PyGAD 2.10.0 and its type is bool. If True, then no warning messages will be displayed. It defaults to False. + + allow_duplicate_genes: Added in PyGAD 2.13.0. If True, then a solution/chromosome may have duplicate gene values. If False, then each gene will have a unique value in its solution. + + stop_criteria: Added in PyGAD 2.15.0. It is assigned to some criteria to stop the evolution if at least one criterion holds. + + parallel_processing: Added in PyGAD 2.17.0. Defaults to `None` which means no parallel processing is used. If a positive integer is assigned, it specifies the number of threads to be used. If a list or a tuple of exactly 2 elements is assigned, then: 1) The first element can be either "process" or "thread" to specify whether processes or threads are used, respectively. 2) The second element can be: 1) A positive integer to select the maximum number of processes or threads to be used. 2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'. 3) None to use the default value as calculated by the concurrent.futures module. + + random_seed: Added in PyGAD 2.18.0. It defines the random seed to be used by the random function generators (we use random functions in the NumPy and random modules). This helps to reproduce the same results by setting the same random seed. + + logger: Added in PyGAD 2.20.0. It accepts a logger object of the 'logging.Logger' class to log the messages. If no logger is passed, then a default logger is created to log/print the messages to the console exactly like using the 'print()' function. + """ + + # If no logger is passed, then create a logger that logs only the messages to the console. + if logger is None: + # Create a logger named with the module name. + logger = logging.getLogger(__name__) + # Set the logger log level to 'DEBUG' to log all kinds of messages. + logger.setLevel(logging.DEBUG) + + # Clear any attached handlers to the logger from the previous runs. + # If the handlers are not cleared, then the new handler will be appended to the list of handlers. + # This makes the single log message be repeated according to the length of the list of handlers. + logger.handlers.clear() + + # Create the handlers. + stream_handler = logging.StreamHandler() + # Set the handler log level to 'DEBUG' to log all kinds of messages received from the logger. + stream_handler.setLevel(logging.DEBUG) + + # Create the formatter that just includes the log message. + formatter = logging.Formatter('%(message)s') + + # Add the formatter to the handler. + stream_handler.setFormatter(formatter) + + # Add the handler to the logger. + logger.addHandler(stream_handler) + else: + # Validate that the passed logger is of type 'logging.Logger'. + if isinstance(logger, logging.Logger): + pass + else: + raise TypeError("The expected type of the 'logger' parameter is 'logging.Logger' but {logger_type} found.".format(logger_type=type(logger))) + + # Create the 'self.logger' attribute to hold the logger. + # Instead of using 'print()', use 'self.logger.info()' + self.logger = logger + + self.random_seed = random_seed + if random_seed is None: + pass + else: + numpy.random.seed(self.random_seed) + random.seed(self.random_seed) + + # If suppress_warnings is bool and its valud is False, then print warning messages. + if type(suppress_warnings) is bool: + self.suppress_warnings = suppress_warnings + else: + self.valid_parameters = False + self.logger.error("The expected type of the 'suppress_warnings' parameter is bool but {suppress_warnings_type} found.".format(suppress_warnings_type=type(suppress_warnings))) + raise TypeError("The expected type of the 'suppress_warnings' parameter is bool but {suppress_warnings_type} found.".format(suppress_warnings_type=type(suppress_warnings))) + + # Validating mutation_by_replacement + if not (type(mutation_by_replacement) is bool): + self.valid_parameters = False + self.logger.error("The expected type of the 'mutation_by_replacement' parameter is bool but {mutation_by_replacement_type} found.".format(mutation_by_replacement_type=type(mutation_by_replacement))) + raise TypeError("The expected type of the 'mutation_by_replacement' parameter is bool but {mutation_by_replacement_type} found.".format(mutation_by_replacement_type=type(mutation_by_replacement))) + + self.mutation_by_replacement = mutation_by_replacement + + # Validate gene_space + self.gene_space_nested = False + if type(gene_space) is type(None): + pass + elif type(gene_space) in [list, tuple, range, numpy.ndarray]: + if len(gene_space) == 0: + self.valid_parameters = False + self.logger.error("'gene_space' cannot be empty (i.e. its length must be >= 0).") + raise ValueError("'gene_space' cannot be empty (i.e. its length must be >= 0).") + else: + for index, el in enumerate(gene_space): + if type(el) in [list, tuple, range, numpy.ndarray]: + if len(el) == 0: + self.valid_parameters = False + self.logger.error("The element indexed {index} of 'gene_space' with type {el_type} cannot be empty (i.e. its length must be >= 0).".format(index=index, el_type=type(el))) + raise ValueError("The element indexed {index} of 'gene_space' with type {el_type} cannot be empty (i.e. its length must be >= 0).".format(index=index, el_type=type(el))) + else: + for val in el: + if not (type(val) in [type(None)] + GA.supported_int_float_types): + self.logger.error("All values in the sublists inside the 'gene_space' attribute must be numeric of type int/float/None but ({val}) of type {typ} found.".format(val=val, typ=type(val))) + raise TypeError("All values in the sublists inside the 'gene_space' attribute must be numeric of type int/float/None but ({val}) of type {typ} found.".format(val=val, typ=type(val))) + self.gene_space_nested = True + elif type(el) == type(None): + pass + # self.gene_space_nested = True + elif type(el) is dict: + if len(el.items()) == 2: + if ('low' in el.keys()) and ('high' in el.keys()): + pass + else: + self.valid_parameters = False + self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) + raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) + elif len(el.items()) == 3: + if ('low' in el.keys()) and ('high' in el.keys()) and ('step' in el.keys()): + pass + else: + self.valid_parameters = False + self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) + raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) + else: + self.valid_parameters = False + self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(el.items()))) + raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(el.items()))) + self.gene_space_nested = True + elif not (type(el) in GA.supported_int_float_types): + self.valid_parameters = False + self.logger.error("Unexpected type {el_type} for the element indexed {index} of 'gene_space'. The accepted types are list/tuple/range/numpy.ndarray of numbers, a single number (int/float), or None.".format(index=index, el_type=type(el))) + raise TypeError("Unexpected type {el_type} for the element indexed {index} of 'gene_space'. The accepted types are list/tuple/range/numpy.ndarray of numbers, a single number (int/float), or None.".format(index=index, el_type=type(el))) + + elif type(gene_space) is dict: + if len(gene_space.items()) == 2: + if ('low' in gene_space.keys()) and ('high' in gene_space.keys()): + pass + else: + self.valid_parameters = False + self.logger.error("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) + raise ValueError("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) + elif len(gene_space.items()) == 3: + if ('low' in gene_space.keys()) and ('high' in gene_space.keys()) and ('step' in gene_space.keys()): + pass + else: + self.valid_parameters = False + self.logger.error("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) + raise ValueError("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) + else: + self.valid_parameters = False + self.logger.error("When the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(gene_space.items()))) + raise ValueError("When the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(gene_space.items()))) + + else: + self.valid_parameters = False + self.logger.error("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) + raise TypeError("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) + + self.gene_space = gene_space + + # Validate init_range_low and init_range_high + if type(init_range_low) in GA.supported_int_float_types: + if type(init_range_high) in GA.supported_int_float_types: + self.init_range_low = init_range_low + self.init_range_high = init_range_high + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'init_range_high' parameter must be either integer or floating-point number but the value ({init_range_high_value}) of type {init_range_high_type} found.".format(init_range_high_value=init_range_high, init_range_high_type=type(init_range_high))) + raise ValueError("The value passed to the 'init_range_high' parameter must be either integer or floating-point number but the value ({init_range_high_value}) of type {init_range_high_type} found.".format(init_range_high_value=init_range_high, init_range_high_type=type(init_range_high))) + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'init_range_low' parameter must be either integer or floating-point number but the value ({init_range_low_value}) of type {init_range_low_type} found.".format(init_range_low_value=init_range_low, init_range_low_type=type(init_range_low))) + raise ValueError("The value passed to the 'init_range_low' parameter must be either integer or floating-point number but the value ({init_range_low_value}) of type {init_range_low_type} found.".format(init_range_low_value=init_range_low, init_range_low_type=type(init_range_low))) + + # Validate random_mutation_min_val and random_mutation_max_val + if type(random_mutation_min_val) in GA.supported_int_float_types: + if type(random_mutation_max_val) in GA.supported_int_float_types: + if random_mutation_min_val == random_mutation_max_val: + if not self.suppress_warnings: warnings.warn("The values of the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val' are equal and this causes a fixed change to all genes.") + else: + self.valid_parameters = False + self.logger.error("The expected type of the 'random_mutation_max_val' parameter is numeric but {random_mutation_max_val_type} found.".format(random_mutation_max_val_type=type(random_mutation_max_val))) + raise TypeError("The expected type of the 'random_mutation_max_val' parameter is numeric but {random_mutation_max_val_type} found.".format(random_mutation_max_val_type=type(random_mutation_max_val))) + else: + self.valid_parameters = False + self.logger.error("The expected type of the 'random_mutation_min_val' parameter is numeric but {random_mutation_min_val_type} found.".format(random_mutation_min_val_type=type(random_mutation_min_val))) + raise TypeError("The expected type of the 'random_mutation_min_val' parameter is numeric but {random_mutation_min_val_type} found.".format(random_mutation_min_val_type=type(random_mutation_min_val))) + self.random_mutation_min_val = random_mutation_min_val + self.random_mutation_max_val = random_mutation_max_val + + # Validate gene_type + if gene_type in GA.supported_int_float_types: + self.gene_type = [gene_type, None] + self.gene_type_single = True + # A single data type of float with precision. + elif len(gene_type) == 2 and gene_type[0] in GA.supported_float_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): + self.gene_type = gene_type + self.gene_type_single = True + elif type(gene_type) in [list, tuple, numpy.ndarray]: + if num_genes is None: + if initial_population is None: + self.valid_parameters = False + self.logger.error("When the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.") + raise TypeError("When the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.") + elif not len(gene_type) == len(initial_population[0]): + self.valid_parameters = False + self.logger.error("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the number of genes parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != number of genes ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=len(initial_population[0]))) + raise ValueError("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the number of genes parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != number of genes ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=len(initial_population[0]))) + elif not len(gene_type) == num_genes: + self.valid_parameters = False + self.logger.error("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the value passed to the 'num_genes' parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != len(num_genes) ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=num_genes)) + raise ValueError("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the value passed to the 'num_genes' parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != len(num_genes) ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=num_genes)) + for gene_type_idx, gene_type_val in enumerate(gene_type): + if gene_type_val in GA.supported_float_types: + # If the gene type is float and no precision is passed, set it to None. + gene_type[gene_type_idx] = [gene_type_val, None] + elif gene_type_val in GA.supported_int_types: + gene_type[gene_type_idx] = [gene_type_val, None] + elif type(gene_type_val) in [list, tuple, numpy.ndarray]: + # A float type is expected in a list/tuple/numpy.ndarray of length 2. + if len(gene_type_val) == 2: + if gene_type_val[0] in GA.supported_float_types: + if type(gene_type_val[1]) in GA.supported_int_types: + pass + else: + self.valid_parameters = False + self.logger.error("In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_precision_val} with type {gene_type_type}.".format(gene_type_val=gene_type_val, gene_type_precision_val=gene_type_val[1], gene_type_type=gene_type_val[0], gene_type_idx=gene_type_idx)) + raise TypeError("In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_precision_val} with type {gene_type_type}.".format(gene_type_val=gene_type_val, gene_type_precision_val=gene_type_val[1], gene_type_type=gene_type_val[0], gene_type_idx=gene_type_idx)) + else: + self.valid_parameters = False + self.logger.error("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) + raise TypeError("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) + else: + self.valid_parameters = False + self.logger.error("In the 'gene_type' parameter, a precision is specified in a list/tuple/numpy.ndarray of length 2 but value ({gene_type_val}) of type {gene_type_type} with length {gene_type_length} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx, gene_type_length=len(gene_type_val))) + raise ValueError("In the 'gene_type' parameter, a precision is specified in a list/tuple/numpy.ndarray of length 2 but value ({gene_type_val}) of type {gene_type_type} with length {gene_type_length} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx, gene_type_length=len(gene_type_val))) + else: + self.valid_parameters = False + self.logger.error("When a list/tuple/numpy.ndarray is assigned to the 'gene_type' parameter, then its elements must be of integer, floating-point, list, tuple, or numpy.ndarray data types but the value ({gene_type_val}) of type {gene_type_type} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx)) + raise ValueError("When a list/tuple/numpy.ndarray is assigned to the 'gene_type' parameter, then its elements must be of integer, floating-point, list, tuple, or numpy.ndarray data types but the value ({gene_type_val}) of type {gene_type_type} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx)) + self.gene_type = gene_type + self.gene_type_single = False + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) + raise ValueError("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) + + # Build the initial population + if initial_population is None: + if (sol_per_pop is None) or (num_genes is None): + self.valid_parameters = False + self.logger.error("Error creating the initial population:\n\nWhen the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.\nThere are 2 options to prepare the initial population:\n1) Assinging the initial population to the 'initial_population' parameter. In this case, the values of the 2 parameters sol_per_pop and num_genes will be deduced.\n2) Assign integer values to the 'sol_per_pop' and 'num_genes' parameters so that PyGAD can create the initial population automatically.") + raise TypeError("Error creating the initial population:\n\nWhen the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.\nThere are 2 options to prepare the initial population:\n1) Assinging the initial population to the 'initial_population' parameter. In this case, the values of the 2 parameters sol_per_pop and num_genes will be deduced.\n2) Assign integer values to the 'sol_per_pop' and 'num_genes' parameters so that PyGAD can create the initial population automatically.") + elif (type(sol_per_pop) is int) and (type(num_genes) is int): + # Validating the number of solutions in the population (sol_per_pop) + if sol_per_pop <= 0: + self.valid_parameters = False + self.logger.error("The number of solutions in the population (sol_per_pop) must be > 0 but ({sol_per_pop}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(sol_per_pop=sol_per_pop)) + raise ValueError("The number of solutions in the population (sol_per_pop) must be > 0 but ({sol_per_pop}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(sol_per_pop=sol_per_pop)) + # Validating the number of gene. + if (num_genes <= 0): + self.valid_parameters = False + self.logger.error("The number of genes cannot be <= 0 but ({num_genes}) found.\n".format(num_genes=num_genes)) + raise ValueError("The number of genes cannot be <= 0 but ({num_genes}) found.\n".format(num_genes=num_genes)) + # When initial_population=None and the 2 parameters sol_per_pop and num_genes have valid integer values, then the initial population is created. + # Inside the initialize_population() method, the initial_population attribute is assigned to keep the initial population accessible. + self.num_genes = num_genes # Number of genes in the solution. + + # In case the 'gene_space' parameter is nested, then make sure the number of its elements equals to the number of genes. + if self.gene_space_nested: + if len(gene_space) != self.num_genes: + self.valid_parameters = False + self.logger.error("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({num_genes})".format(len_gene_space=len(gene_space), num_genes=self.num_genes)) + raise ValueError("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({num_genes})".format(len_gene_space=len(gene_space), num_genes=self.num_genes)) + + self.sol_per_pop = sol_per_pop # Number of solutions in the population. + self.initialize_population(self.init_range_low, + self.init_range_high, + allow_duplicate_genes, + True, + self.gene_type) + else: + self.valid_parameters = False + self.logger.error("The expected type of both the sol_per_pop and num_genes parameters is int but {sol_per_pop_type} and {num_genes_type} found.".format(sol_per_pop_type=type(sol_per_pop), num_genes_type=type(num_genes))) + raise TypeError("The expected type of both the sol_per_pop and num_genes parameters is int but {sol_per_pop_type} and {num_genes_type} found.".format(sol_per_pop_type=type(sol_per_pop), num_genes_type=type(num_genes))) + elif not type(initial_population) in [list, tuple, numpy.ndarray]: + self.valid_parameters = False + self.logger.error("The value assigned to the 'initial_population' parameter is expected to by of type list, tuple, or ndarray but {initial_population_type} found.".format(initial_population_type=type(initial_population))) + raise TypeError("The value assigned to the 'initial_population' parameter is expected to by of type list, tuple, or ndarray but {initial_population_type} found.".format(initial_population_type=type(initial_population))) + elif numpy.array(initial_population).ndim != 2: + self.valid_parameters = False + self.logger.error("A 2D list is expected to the initail_population parameter but a ({initial_population_ndim}-D) list found.".format(initial_population_ndim=numpy.array(initial_population).ndim)) + raise ValueError("A 2D list is expected to the initail_population parameter but a ({initial_population_ndim}-D) list found.".format(initial_population_ndim=numpy.array(initial_population).ndim)) + else: + # Validate the type of each value in the 'initial_population' parameter. + for row_idx in range(len(initial_population)): + for col_idx in range(len(initial_population[0])): + if type(initial_population[row_idx][col_idx]) in GA.supported_int_float_types: + pass + else: + self.valid_parameters = False + self.logger.error("The values in the initial population can be integers or floats but the value ({value}) of type {value_type} found.".format(value=initial_population[row_idx][col_idx], value_type=type(initial_population[row_idx][col_idx]))) + raise TypeError("The values in the initial population can be integers or floats but the value ({value}) of type {value_type} found.".format(value=initial_population[row_idx][col_idx], value_type=type(initial_population[row_idx][col_idx]))) + + # Forcing the initial_population array to have the data type assigned to the gene_type parameter. + if self.gene_type_single == True: + if self.gene_type[1] == None: + self.initial_population = numpy.array(initial_population, dtype=self.gene_type[0]) + else: + self.initial_population = numpy.round(numpy.array(initial_population, dtype=self.gene_type[0]), self.gene_type[1]) + else: + initial_population = numpy.array(initial_population) + self.initial_population = numpy.zeros(shape=(initial_population.shape[0], initial_population.shape[1]), dtype=object) + for gene_idx in range(initial_population.shape[1]): + if self.gene_type[gene_idx][1] is None: + self.initial_population[:, gene_idx] = numpy.asarray(initial_population[:, gene_idx], + dtype=self.gene_type[gene_idx][0]) + else: + self.initial_population[:, gene_idx] = numpy.round(numpy.asarray(initial_population[:, gene_idx], + dtype=self.gene_type[gene_idx][0]), + self.gene_type[gene_idx][1]) + + self.population = self.initial_population.copy() # A NumPy array holding the initial population. + self.num_genes = self.initial_population.shape[1] # Number of genes in the solution. + self.sol_per_pop = self.initial_population.shape[0] # Number of solutions in the population. + self.pop_size = (self.sol_per_pop,self.num_genes) # The population size. + + # Round initial_population and population + self.initial_population = self.round_genes(self.initial_population) + self.population = self.round_genes(self.population) + + # In case the 'gene_space' parameter is nested, then make sure the number of its elements equals to the number of genes. + if self.gene_space_nested: + if len(gene_space) != self.num_genes: + self.valid_parameters = False + self.logger.error("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({len_num_genes})".format(len_gene_space=len(gene_space), len_num_genes=self.num_genes)) + raise ValueError("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({len_num_genes})".format(len_gene_space=len(gene_space), len_num_genes=self.num_genes)) + + # Validating the number of parents to be selected for mating (num_parents_mating) + if num_parents_mating <= 0: + self.valid_parameters = False + self.logger.error("The number of parents mating (num_parents_mating) parameter must be > 0 but ({num_parents_mating}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(num_parents_mating=num_parents_mating)) + raise ValueError("The number of parents mating (num_parents_mating) parameter must be > 0 but ({num_parents_mating}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(num_parents_mating=num_parents_mating)) + + # Validating the number of parents to be selected for mating: num_parents_mating + if (num_parents_mating > self.sol_per_pop): + self.valid_parameters = False + self.logger.error("The number of parents to select for mating ({num_parents_mating}) cannot be greater than the number of solutions in the population ({sol_per_pop}) (i.e., num_parents_mating must always be <= sol_per_pop).\n".format(num_parents_mating=num_parents_mating, sol_per_pop=self.sol_per_pop)) + raise ValueError("The number of parents to select for mating ({num_parents_mating}) cannot be greater than the number of solutions in the population ({sol_per_pop}) (i.e., num_parents_mating must always be <= sol_per_pop).\n".format(num_parents_mating=num_parents_mating, sol_per_pop=self.sol_per_pop)) + + self.num_parents_mating = num_parents_mating + + # crossover: Refers to the method that applies the crossover operator based on the selected type of crossover in the crossover_type property. + # Validating the crossover type: crossover_type + if (crossover_type is None): + self.crossover = None + elif inspect.ismethod(crossover_type): + # Check if the crossover_type is a method that accepts 4 paramaters. + if (crossover_type.__code__.co_argcount == 4): + # The crossover method assigned to the crossover_type parameter is validated. + self.crossover = crossover_type + else: + self.valid_parameters = False + self.logger.error("When 'crossover_type' is assigned to a method, then this crossover method must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The selected parents.\n3) The size of the offspring to be produced.\n4) The instance from the pygad.GA class.\n\nThe passed crossover method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) + raise ValueError("When 'crossover_type' is assigned to a method, then this crossover method must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The selected parents.\n3) The size of the offspring to be produced.\n4) The instance from the pygad.GA class.\n\nThe passed crossover method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) + elif callable(crossover_type): + # Check if the crossover_type is a function that accepts 2 paramaters. + if (crossover_type.__code__.co_argcount == 3): + # The crossover function assigned to the crossover_type parameter is validated. + self.crossover = crossover_type + else: + self.valid_parameters = False + self.logger.error("When 'crossover_type' is assigned to a function, then this crossover function must accept 3 parameters:\n1) The selected parents.\n2) The size of the offspring to be produced.3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed crossover function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) + raise ValueError("When 'crossover_type' is assigned to a function, then this crossover function must accept 3 parameters:\n1) The selected parents.\n2) The size of the offspring to be produced.3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed crossover function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) + elif not (type(crossover_type) is str): + self.valid_parameters = False + self.logger.error("The expected type of the 'crossover_type' parameter is either callable or str but {crossover_type} found.".format(crossover_type=type(crossover_type))) + raise TypeError("The expected type of the 'crossover_type' parameter is either callable or str but {crossover_type} found.".format(crossover_type=type(crossover_type))) + else: # type crossover_type is str + crossover_type = crossover_type.lower() + if (crossover_type == "single_point"): + self.crossover = self.single_point_crossover + elif (crossover_type == "two_points"): + self.crossover = self.two_points_crossover + elif (crossover_type == "uniform"): + self.crossover = self.uniform_crossover + elif (crossover_type == "scattered"): + self.crossover = self.scattered_crossover + else: + self.valid_parameters = False + self.logger.error("Undefined crossover type. \nThe assigned value to the crossover_type ({crossover_type}) parameter does not refer to one of the supported crossover types which are: \n-single_point (for single point crossover)\n-two_points (for two points crossover)\n-uniform (for uniform crossover)\n-scattered (for scattered crossover).\n".format(crossover_type=crossover_type)) + raise TypeError("Undefined crossover type. \nThe assigned value to the crossover_type ({crossover_type}) parameter does not refer to one of the supported crossover types which are: \n-single_point (for single point crossover)\n-two_points (for two points crossover)\n-uniform (for uniform crossover)\n-scattered (for scattered crossover).\n".format(crossover_type=crossover_type)) + + self.crossover_type = crossover_type + + # Calculate the value of crossover_probability + if crossover_probability is None: + self.crossover_probability = None + elif type(crossover_probability) in GA.supported_int_float_types: + if crossover_probability >= 0 and crossover_probability <= 1: + self.crossover_probability = crossover_probability + else: + self.valid_parameters = False + self.logger.error("The value assigned to the 'crossover_probability' parameter must be between 0 and 1 inclusive but ({crossover_probability_value}) found.".format(crossover_probability_value=crossover_probability)) + raise ValueError("The value assigned to the 'crossover_probability' parameter must be between 0 and 1 inclusive but ({crossover_probability_value}) found.".format(crossover_probability_value=crossover_probability)) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for the 'crossover_probability' parameter. Float is expected but ({crossover_probability_value}) of type {crossover_probability_type} found.".format(crossover_probability_value=crossover_probability, crossover_probability_type=type(crossover_probability))) + raise TypeError("Unexpected type for the 'crossover_probability' parameter. Float is expected but ({crossover_probability_value}) of type {crossover_probability_type} found.".format(crossover_probability_value=crossover_probability, crossover_probability_type=type(crossover_probability))) + + # mutation: Refers to the method that applies the mutation operator based on the selected type of mutation in the mutation_type property. + # Validating the mutation type: mutation_type + # "adaptive" mutation is supported starting from PyGAD 2.10.0 + if mutation_type is None: + self.mutation = None + elif inspect.ismethod(mutation_type): + # Check if the mutation_type is a method that accepts 3 paramater. + if (mutation_type.__code__.co_argcount == 3): + # The mutation method assigned to the mutation_type parameter is validated. + self.mutation = mutation_type + else: + self.valid_parameters = False + self.logger.error("When 'mutation_type' is assigned to a method, then it must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The offspring to be mutated.\n3) The instance from the pygad.GA class.\n\nThe passed mutation method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) + raise ValueError("When 'mutation_type' is assigned to a method, then it must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The offspring to be mutated.\n3) The instance from the pygad.GA class.\n\nThe passed mutation method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) + elif callable(mutation_type): + # Check if the mutation_type is a function that accepts 2 paramater. + if (mutation_type.__code__.co_argcount == 2): + # The mutation function assigned to the mutation_type parameter is validated. + self.mutation = mutation_type + else: + self.valid_parameters = False + self.logger.error("When 'mutation_type' is assigned to a function, then this mutation function must accept 2 parameters:\n1) The offspring to be mutated.\n2) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed mutation function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) + raise ValueError("When 'mutation_type' is assigned to a function, then this mutation function must accept 2 parameters:\n1) The offspring to be mutated.\n2) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed mutation function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) + elif not (type(mutation_type) is str): + self.valid_parameters = False + self.logger.error("The expected type of the 'mutation_type' parameter is either callable or str but {mutation_type} found.".format(mutation_type=type(mutation_type))) + raise TypeError("The expected type of the 'mutation_type' parameter is either callable or str but {mutation_type} found.".format(mutation_type=type(mutation_type))) + else: # type mutation_type is str + mutation_type = mutation_type.lower() + if (mutation_type == "random"): + self.mutation = self.random_mutation + elif (mutation_type == "swap"): + self.mutation = self.swap_mutation + elif (mutation_type == "scramble"): + self.mutation = self.scramble_mutation + elif (mutation_type == "inversion"): + self.mutation = self.inversion_mutation + elif (mutation_type == "adaptive"): + self.mutation = self.adaptive_mutation + else: + self.valid_parameters = False + self.logger.error("Undefined mutation type. \nThe assigned string value to the 'mutation_type' parameter ({mutation_type}) does not refer to one of the supported mutation types which are: \n-random (for random mutation)\n-swap (for swap mutation)\n-inversion (for inversion mutation)\n-scramble (for scramble mutation)\n-adaptive (for adaptive mutation).\n".format(mutation_type=mutation_type)) + raise TypeError("Undefined mutation type. \nThe assigned string value to the 'mutation_type' parameter ({mutation_type}) does not refer to one of the supported mutation types which are: \n-random (for random mutation)\n-swap (for swap mutation)\n-inversion (for inversion mutation)\n-scramble (for scramble mutation)\n-adaptive (for adaptive mutation).\n".format(mutation_type=mutation_type)) + + self.mutation_type = mutation_type + + # Calculate the value of mutation_probability + if not (self.mutation_type is None): + if mutation_probability is None: + self.mutation_probability = None + elif (mutation_type != "adaptive"): + # Mutation probability is fixed not adaptive. + if type(mutation_probability) in GA.supported_int_float_types: + if mutation_probability >= 0 and mutation_probability <= 1: + self.mutation_probability = mutation_probability + else: + self.valid_parameters = False + self.logger.error("The value assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=mutation_probability)) + raise ValueError("The value assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=mutation_probability)) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) + raise TypeError("Unexpected type for the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) + else: + # Mutation probability is adaptive not fixed. + if type(mutation_probability) in [list, tuple, numpy.ndarray]: + if len(mutation_probability) == 2: + for el in mutation_probability: + if type(el) in GA.supported_int_float_types: + if el >= 0 and el <= 1: + pass + else: + self.valid_parameters = False + self.logger.error("The values assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=el)) + raise ValueError("The values assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=el)) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for a value assigned to the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=el, mutation_probability_type=type(el))) + raise TypeError("Unexpected type for a value assigned to the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=el, mutation_probability_type=type(el))) + if mutation_probability[0] < mutation_probability[1]: + if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_probability' parameter is {first_el} which is smaller than the second element {second_el}. This means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions. Please make the first element higher than the second element.".format(first_el=mutation_probability[0], second_el=mutation_probability[1])) + self.mutation_probability = mutation_probability + else: + self.valid_parameters = False + self.logger.error("When mutation_type='adaptive', then the 'mutation_probability' parameter must have only 2 elements but ({mutation_probability_length}) element(s) found.".format(mutation_probability_length=len(mutation_probability))) + raise ValueError("When mutation_type='adaptive', then the 'mutation_probability' parameter must have only 2 elements but ({mutation_probability_length}) element(s) found.".format(mutation_probability_length=len(mutation_probability))) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for the 'mutation_probability' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) + raise TypeError("Unexpected type for the 'mutation_probability' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) + else: + pass + + # Calculate the value of mutation_num_genes + if not (self.mutation_type is None): + if mutation_num_genes is None: + # The mutation_num_genes parameter does not exist. Checking whether adaptive mutation is used. + if (mutation_type != "adaptive"): + # The percent of genes to mutate is fixed not adaptive. + if mutation_percent_genes == 'default'.lower(): + mutation_percent_genes = 10 + # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. + mutation_num_genes = numpy.uint32((mutation_percent_genes*self.num_genes)/100) + # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. + if mutation_num_genes == 0: + if self.mutation_probability is None: + if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate (mutation_percent_genes={mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes, mutation_num=mutation_num_genes)) + mutation_num_genes = 1 + + elif type(mutation_percent_genes) in GA.supported_int_float_types: + if (mutation_percent_genes <= 0 or mutation_percent_genes > 100): + self.valid_parameters = False + self.logger.error("The percentage of selected genes for mutation (mutation_percent_genes) must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) + raise ValueError("The percentage of selected genes for mutation (mutation_percent_genes) must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) + else: + # If mutation_percent_genes equals the string "default", then it is replaced by the numeric value 10. + if mutation_percent_genes == 'default'.lower(): + mutation_percent_genes = 10 + + # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. + mutation_num_genes = numpy.uint32((mutation_percent_genes*self.num_genes)/100) + # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. + if mutation_num_genes == 0: + if self.mutation_probability is None: + if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate (mutation_percent_genes={mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes, mutation_num=mutation_num_genes)) + mutation_num_genes = 1 + else: + self.valid_parameters = False + self.logger.error("Unexpected value or type of the 'mutation_percent_genes' parameter. It only accepts the string 'default' or a numeric value but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=mutation_percent_genes, mutation_percent_genes_type=type(mutation_percent_genes))) + raise TypeError("Unexpected value or type of the 'mutation_percent_genes' parameter. It only accepts the string 'default' or a numeric value but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=mutation_percent_genes, mutation_percent_genes_type=type(mutation_percent_genes))) + else: + # The percent of genes to mutate is adaptive not fixed. + if type(mutation_percent_genes) in [list, tuple, numpy.ndarray]: + if len(mutation_percent_genes) == 2: + mutation_num_genes = numpy.zeros_like(mutation_percent_genes, dtype=numpy.uint32) + for idx, el in enumerate(mutation_percent_genes): + if type(el) in GA.supported_int_float_types: + if (el <= 0 or el > 100): + self.valid_parameters = False + self.logger.error("The values assigned to the 'mutation_percent_genes' must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) + raise ValueError("The values assigned to the 'mutation_percent_genes' must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for a value assigned to the 'mutation_percent_genes' parameter. An integer value is expected but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=el, mutation_percent_genes_type=type(el))) + raise TypeError("Unexpected type for a value assigned to the 'mutation_percent_genes' parameter. An integer value is expected but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=el, mutation_percent_genes_type=type(el))) + # At this point of the loop, the current value assigned to the parameter 'mutation_percent_genes' is validated. + # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. + mutation_num_genes[idx] = numpy.uint32((mutation_percent_genes[idx]*self.num_genes)/100) + # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. + if mutation_num_genes[idx] == 0: + if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate ({mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes[idx], mutation_num=mutation_num_genes[idx])) + mutation_num_genes[idx] = 1 + if mutation_percent_genes[0] < mutation_percent_genes[1]: + if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_percent_genes' parameter is ({first_el}) which is smaller than the second element ({second_el}).\nThis means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions.\nPlease make the first element higher than the second element.".format(first_el=mutation_percent_genes[0], second_el=mutation_percent_genes[1])) + # At this point outside the loop, all values of the parameter 'mutation_percent_genes' are validated. Eveyrthing is OK. + else: + self.valid_parameters = False + self.logger.error("When mutation_type='adaptive', then the 'mutation_percent_genes' parameter must have only 2 elements but ({mutation_percent_genes_length}) element(s) found.".format(mutation_percent_genes_length=len(mutation_percent_genes))) + raise ValueError("When mutation_type='adaptive', then the 'mutation_percent_genes' parameter must have only 2 elements but ({mutation_percent_genes_length}) element(s) found.".format(mutation_percent_genes_length=len(mutation_percent_genes))) + else: + if self.mutation_probability is None: + self.valid_parameters = False + self.logger.error("Unexpected type of the 'mutation_percent_genes' parameter. When mutation_type='adaptive', then the 'mutation_percent_genes' parameter should exist and assigned a list/tuple/numpy.ndarray with 2 values but ({mutation_percent_genes_value}) found.".format(mutation_percent_genes_value=mutation_percent_genes)) + raise TypeError("Unexpected type of the 'mutation_percent_genes' parameter. When mutation_type='adaptive', then the 'mutation_percent_genes' parameter should exist and assigned a list/tuple/numpy.ndarray with 2 values but ({mutation_percent_genes_value}) found.".format(mutation_percent_genes_value=mutation_percent_genes)) + # The mutation_num_genes parameter exists. Checking whether adaptive mutation is used. + elif (mutation_type != "adaptive"): + # Number of genes to mutate is fixed not adaptive. + if type(mutation_num_genes) in GA.supported_int_types: + if (mutation_num_genes <= 0): + self.valid_parameters = False + self.logger.error("The number of selected genes for mutation (mutation_num_genes) cannot be <= 0 but ({mutation_num_genes}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes=mutation_num_genes)) + raise ValueError("The number of selected genes for mutation (mutation_num_genes) cannot be <= 0 but ({mutation_num_genes}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes=mutation_num_genes)) + elif (mutation_num_genes > self.num_genes): + self.valid_parameters = False + self.logger.error("The number of selected genes for mutation (mutation_num_genes), which is ({mutation_num_genes}), cannot be greater than the number of genes ({num_genes}).\n".format(mutation_num_genes=mutation_num_genes, num_genes=self.num_genes)) + raise ValueError("The number of selected genes for mutation (mutation_num_genes), which is ({mutation_num_genes}), cannot be greater than the number of genes ({num_genes}).\n".format(mutation_num_genes=mutation_num_genes, num_genes=self.num_genes)) + else: + self.valid_parameters = False + self.logger.error("The 'mutation_num_genes' parameter is expected to be a positive integer but the value ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.\n".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) + raise TypeError("The 'mutation_num_genes' parameter is expected to be a positive integer but the value ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.\n".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) + else: + # Number of genes to mutate is adaptive not fixed. + if type(mutation_num_genes) in [list, tuple, numpy.ndarray]: + if len(mutation_num_genes) == 2: + for el in mutation_num_genes: + if type(el) in GA.supported_int_types: + if (el <= 0): + self.valid_parameters = False + self.logger.error("The values assigned to the 'mutation_num_genes' cannot be <= 0 but ({mutation_num_genes_value}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes_value=el)) + raise ValueError("The values assigned to the 'mutation_num_genes' cannot be <= 0 but ({mutation_num_genes_value}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes_value=el)) + elif (el > self.num_genes): + self.valid_parameters = False + self.logger.error("The values assigned to the 'mutation_num_genes' cannot be greater than the number of genes ({num_genes}) but ({mutation_num_genes_value}) found.\n".format(mutation_num_genes_value=el, num_genes=self.num_genes)) + raise ValueError("The values assigned to the 'mutation_num_genes' cannot be greater than the number of genes ({num_genes}) but ({mutation_num_genes_value}) found.\n".format(mutation_num_genes_value=el, num_genes=self.num_genes)) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for a value assigned to the 'mutation_num_genes' parameter. An integer value is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=el, mutation_num_genes_type=type(el))) + raise TypeError("Unexpected type for a value assigned to the 'mutation_num_genes' parameter. An integer value is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=el, mutation_num_genes_type=type(el))) + # At this point of the loop, the current value assigned to the parameter 'mutation_num_genes' is validated. + if mutation_num_genes[0] < mutation_num_genes[1]: + if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_num_genes' parameter is {first_el} which is smaller than the second element {second_el}. This means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions. Please make the first element higher than the second element.".format(first_el=mutation_num_genes[0], second_el=mutation_num_genes[1])) + # At this point outside the loop, all values of the parameter 'mutation_num_genes' are validated. Eveyrthing is OK. + else: + self.valid_parameters = False + self.logger.error("When mutation_type='adaptive', then the 'mutation_num_genes' parameter must have only 2 elements but ({mutation_num_genes_length}) element(s) found.".format(mutation_num_genes_length=len(mutation_num_genes))) + raise ValueError("When mutation_type='adaptive', then the 'mutation_num_genes' parameter must have only 2 elements but ({mutation_num_genes_length}) element(s) found.".format(mutation_num_genes_length=len(mutation_num_genes))) + else: + self.valid_parameters = False + self.logger.error("Unexpected type for the 'mutation_num_genes' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) + raise TypeError("Unexpected type for the 'mutation_num_genes' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) + else: + pass + + # Validating mutation_by_replacement and mutation_type + if self.mutation_type != "random" and self.mutation_by_replacement: + if not self.suppress_warnings: warnings.warn("The mutation_by_replacement parameter is set to True while the mutation_type parameter is not set to random but ({mut_type}). Note that the mutation_by_replacement parameter has an effect only when mutation_type='random'.".format(mut_type=mutation_type)) + + # Check if crossover and mutation are both disabled. + if (self.mutation_type is None) and (self.crossover_type is None): + if not self.suppress_warnings: warnings.warn("The 2 parameters mutation_type and crossover_type are None. This disables any type of evolution the genetic algorithm can make. As a result, the genetic algorithm cannot find a better solution that the best solution in the initial population.") + + # select_parents: Refers to a method that selects the parents based on the parent selection type specified in the parent_selection_type attribute. + # Validating the selected type of parent selection: parent_selection_type + if inspect.ismethod(parent_selection_type): + # Check if the parent_selection_type is a method that accepts 4 paramaters. + if (parent_selection_type.__code__.co_argcount == 4): + # population: Added in PyGAD 2.16.0. It should used only to support custom parent selection functions. Otherwise, it should be left to None to retirve the population by self.population. + # The parent selection method assigned to the parent_selection_type parameter is validated. + self.select_parents = parent_selection_type + else: + self.valid_parameters = False + self.logger.error("When 'parent_selection_type' is assigned to a method, then it must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The fitness values of the current population.\n3) The number of parents needed.\n4) The instance from the pygad.GA class.\n\nThe passed parent selection method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) + raise ValueError("When 'parent_selection_type' is assigned to a method, then it must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The fitness values of the current population.\n3) The number of parents needed.\n4) The instance from the pygad.GA class.\n\nThe passed parent selection method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) + elif callable(parent_selection_type): + # Check if the parent_selection_type is a function that accepts 3 paramaters. + if (parent_selection_type.__code__.co_argcount == 3): + # population: Added in PyGAD 2.16.0. It should used only to support custom parent selection functions. Otherwise, it should be left to None to retirve the population by self.population. + # The parent selection function assigned to the parent_selection_type parameter is validated. + self.select_parents = parent_selection_type + else: + self.valid_parameters = False + self.logger.error("When 'parent_selection_type' is assigned to a user-defined function, then this parent selection function must accept 3 parameters:\n1) The fitness values of the current population.\n2) The number of parents needed.\n3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed parent selection function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) + raise ValueError("When 'parent_selection_type' is assigned to a user-defined function, then this parent selection function must accept 3 parameters:\n1) The fitness values of the current population.\n2) The number of parents needed.\n3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed parent selection function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) + elif not (type(parent_selection_type) is str): + self.valid_parameters = False + self.logger.error("The expected type of the 'parent_selection_type' parameter is either callable or str but {parent_selection_type} found.".format(parent_selection_type=type(parent_selection_type))) + raise TypeError("The expected type of the 'parent_selection_type' parameter is either callable or str but {parent_selection_type} found.".format(parent_selection_type=type(parent_selection_type))) + else: + parent_selection_type = parent_selection_type.lower() + if (parent_selection_type == "sss"): + self.select_parents = self.steady_state_selection + elif (parent_selection_type == "rws"): + self.select_parents = self.roulette_wheel_selection + elif (parent_selection_type == "sus"): + self.select_parents = self.stochastic_universal_selection + elif (parent_selection_type == "random"): + self.select_parents = self.random_selection + elif (parent_selection_type == "tournament"): + self.select_parents = self.tournament_selection + elif (parent_selection_type == "rank"): + self.select_parents = self.rank_selection + else: + self.valid_parameters = False + self.logger.error("Undefined parent selection type: {parent_selection_type}. \nThe assigned value to the 'parent_selection_type' parameter does not refer to one of the supported parent selection techniques which are: \n-sss (for steady state selection)\n-rws (for roulette wheel selection)\n-sus (for stochastic universal selection)\n-rank (for rank selection)\n-random (for random selection)\n-tournament (for tournament selection).\n".format(parent_selection_type=parent_selection_type)) + raise TypeError("Undefined parent selection type: {parent_selection_type}. \nThe assigned value to the 'parent_selection_type' parameter does not refer to one of the supported parent selection techniques which are: \n-sss (for steady state selection)\n-rws (for roulette wheel selection)\n-sus (for stochastic universal selection)\n-rank (for rank selection)\n-random (for random selection)\n-tournament (for tournament selection).\n".format(parent_selection_type=parent_selection_type)) + + # For tournament selection, validate the K value. + if(parent_selection_type == "tournament"): + if (K_tournament > self.sol_per_pop): + K_tournament = self.sol_per_pop + if not self.suppress_warnings: warnings.warn("K of the tournament selection ({K_tournament}) should not be greater than the number of solutions within the population ({sol_per_pop}).\nK will be clipped to be equal to the number of solutions in the population (sol_per_pop).\n".format(K_tournament=K_tournament, sol_per_pop=self.sol_per_pop)) + elif (K_tournament <= 0): + self.valid_parameters = False + self.logger.error("K of the tournament selection cannot be <=0 but ({K_tournament}) found.\n".format(K_tournament=K_tournament)) + raise ValueError("K of the tournament selection cannot be <=0 but ({K_tournament}) found.\n".format(K_tournament=K_tournament)) + + self.K_tournament = K_tournament + + # Validating the number of parents to keep in the next population: keep_parents + if not (type(keep_parents) in GA.supported_int_types): + self.valid_parameters = False + self.logger.error("Incorrect type of the value assigned to the keep_parents parameter. The value ({keep_parents}) of type {keep_parents_type} found but an integer is expected.".format(keep_parents=keep_parents, keep_parents_type=type(keep_parents))) + raise TypeError("Incorrect type of the value assigned to the keep_parents parameter. The value ({keep_parents}) of type {keep_parents_type} found but an integer is expected.".format(keep_parents=keep_parents, keep_parents_type=type(keep_parents))) + elif (keep_parents > self.sol_per_pop or keep_parents > self.num_parents_mating or keep_parents < -1): + self.valid_parameters = False + self.logger.error("Incorrect value to the keep_parents parameter: {keep_parents}. \nThe assigned value to the keep_parent parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Less than or equal to num_parents_mating\n3) Greater than or equal to -1.".format(keep_parents=keep_parents)) + raise ValueError("Incorrect value to the keep_parents parameter: {keep_parents}. \nThe assigned value to the keep_parent parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Less than or equal to num_parents_mating\n3) Greater than or equal to -1.".format(keep_parents=keep_parents)) + + self.keep_parents = keep_parents + + if parent_selection_type == "sss" and self.keep_parents == 0: + if not self.suppress_warnings: warnings.warn("The steady-state parent (sss) selection operator is used despite that no parents are kept in the next generation.") + + # Validating the number of elitism to keep in the next population: keep_elitism + if not (type(keep_elitism) in GA.supported_int_types): + self.valid_parameters = False + self.logger.error("Incorrect type of the value assigned to the keep_elitism parameter. The value ({keep_elitism}) of type {keep_elitism_type} found but an integer is expected.".format(keep_elitism=keep_elitism, keep_elitism_type=type(keep_elitism))) + raise TypeError("Incorrect type of the value assigned to the keep_elitism parameter. The value ({keep_elitism}) of type {keep_elitism_type} found but an integer is expected.".format(keep_elitism=keep_elitism, keep_elitism_type=type(keep_elitism))) + elif (keep_elitism > self.sol_per_pop or keep_elitism < 0): + self.valid_parameters = False + self.logger.error("Incorrect value to the keep_elitism parameter: {keep_elitism}. \nThe assigned value to the keep_elitism parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Greater than or equal to 0.".format(keep_elitism=keep_elitism)) + raise ValueError("Incorrect value to the keep_elitism parameter: {keep_elitism}. \nThe assigned value to the keep_elitism parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Greater than or equal to 0.".format(keep_elitism=keep_elitism)) + + self.keep_elitism = keep_elitism + + # Validate keep_parents. + if self.keep_elitism == 0: + if (self.keep_parents == -1): # Keep all parents in the next population. + self.num_offspring = self.sol_per_pop - self.num_parents_mating + elif (self.keep_parents == 0): # Keep no parents in the next population. + self.num_offspring = self.sol_per_pop + elif (self.keep_parents > 0): # Keep the specified number of parents in the next population. + self.num_offspring = self.sol_per_pop - self.keep_parents + else: + self.num_offspring = self.sol_per_pop - self.keep_elitism + + # Check if the fitness_func is a method. + # In PyGAD 2.19.0, a method can be passed to the fitness function. If function is passed, then it accepts 2 parameters. If method, then it accepts 3 parameters. + # In PyGAD 2.20.0, a new parameter is passed referring to the instance of the `pygad.GA` class. So, the function accepts 3 parameters and the method accepts 4 parameters. + if inspect.ismethod(fitness_func): + # If the fitness is calculated through a method, not a function, then there is a fourth 'self` paramaters. + if (fitness_func.__code__.co_argcount == 4): + self.fitness_func = fitness_func + else: + self.valid_parameters = False + self.logger.error("In PyGAD 2.20.0, if a method is used to calculate the fitness value, then it must accept 4 parameters\n1) Expected to be the 'self' object.\n2) The instance of the 'pygad.GA' class.\n3) A solution to calculate its fitness value.\n4) The solution's index within the population.\n\nThe passed fitness method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) + raise ValueError("In PyGAD 2.20.0, if a method is used to calculate the fitness value, then it must accept 4 parameters\n1) Expected to be the 'self' object.\n2) The instance of the 'pygad.GA' class.\n3) A solution to calculate its fitness value.\n4) The solution's index within the population.\n\nThe passed fitness method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) + elif callable(fitness_func): + # Check if the fitness function accepts 2 paramaters. + if (fitness_func.__code__.co_argcount == 3): + self.fitness_func = fitness_func + else: + self.valid_parameters = False + self.logger.error("In PyGAD 2.20.0, the fitness function must accept 3 parameters:\n1) The instance of the 'pygad.GA' class.\n2) A solution to calculate its fitness value.\n3) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) + raise ValueError("In PyGAD 2.20.0, the fitness function must accept 3 parameters:\n1) The instance of the 'pygad.GA' class.\n2) A solution to calculate its fitness value.\n3) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the fitness_func parameter is expected to be of type function but {fitness_func_type} found.".format(fitness_func_type=type(fitness_func))) + raise TypeError("The value assigned to the fitness_func parameter is expected to be of type function but {fitness_func_type} found.".format(fitness_func_type=type(fitness_func))) + + if fitness_batch_size is None: + pass + elif not (type(fitness_batch_size) in GA.supported_int_types): + self.valid_parameters = False + self.logger.error("The value assigned to the fitness_batch_size parameter is expected to be integer but the value ({fitness_batch_size}) of type {fitness_batch_size_type} found.".format(fitness_batch_size=fitness_batch_size, fitness_batch_size_type=type(fitness_batch_size))) + raise TypeError("The value assigned to the fitness_batch_size parameter is expected to be integer but the value ({fitness_batch_size}) of type {fitness_batch_size_type} found.".format(fitness_batch_size=fitness_batch_size, fitness_batch_size_type=type(fitness_batch_size))) + elif fitness_batch_size <= 0 or fitness_batch_size > self.sol_per_pop: + self.valid_parameters = False + self.logger.error("The value assigned to the fitness_batch_size parameter must be:\n1) Greater than 0.\n2) Less than or equal to sol_per_pop ({sol_per_pop}).\nBut the value ({fitness_batch_size}) found.".format(fitness_batch_size=fitness_batch_size, sol_per_pop=self.sol_per_pop)) + raise ValueError("The value assigned to the fitness_batch_size parameter must be:\n1) Greater than 0.\n2) Less than or equal to sol_per_pop ({sol_per_pop}).\nBut the value ({fitness_batch_size}) found.".format(fitness_batch_size=fitness_batch_size, sol_per_pop=self.sol_per_pop)) + + self.fitness_batch_size = fitness_batch_size + + # Check if the on_start exists. + if not (on_start is None): + if inspect.ismethod(on_start): + # Check if the on_start method accepts 2 paramaters. + if (on_start.__code__.co_argcount == 2): + self.on_start = on_start + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_start parameter must accept only 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) + raise ValueError("The method assigned to the on_start parameter must accept only 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) + # Check if the on_start is a function. + elif callable(on_start): + # Check if the on_start function accepts only a single paramater. + if (on_start.__code__.co_argcount == 1): + self.on_start = on_start + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_start parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) + raise ValueError("The function assigned to the on_start parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_start parameter is expected to be of type function but {on_start_type} found.".format(on_start_type=type(on_start))) + raise TypeError("The value assigned to the on_start parameter is expected to be of type function but {on_start_type} found.".format(on_start_type=type(on_start))) + else: + self.on_start = None + + # Check if the on_fitness exists. + if not (on_fitness is None): + # Check if the on_fitness is a method. + if inspect.ismethod(on_fitness): + # Check if the on_fitness method accepts 3 paramaters. + if (on_fitness.__code__.co_argcount == 3): + self.on_fitness = on_fitness + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_fitness parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) + raise ValueError("The method assigned to the on_fitness parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) + # Check if the on_fitness is a function. + elif callable(on_fitness): + # Check if the on_fitness function accepts 2 paramaters. + if (on_fitness.__code__.co_argcount == 2): + self.on_fitness = on_fitness + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_fitness parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) + raise ValueError("The function assigned to the on_fitness parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_fitness parameter is expected to be of type function but {on_fitness_type} found.".format(on_fitness_type=type(on_fitness))) + raise TypeError("The value assigned to the on_fitness parameter is expected to be of type function but {on_fitness_type} found.".format(on_fitness_type=type(on_fitness))) + else: + self.on_fitness = None + + # Check if the on_parents exists. + if not (on_parents is None): + # Check if the on_parents is a method. + if inspect.ismethod(on_parents): + # Check if the on_parents method accepts 3 paramaters. + if (on_parents.__code__.co_argcount == 3): + self.on_parents = on_parents + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_parents parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) + raise ValueError("The method assigned to the on_parents parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) + # Check if the on_parents is a function. + elif callable(on_parents): + # Check if the on_parents function accepts 2 paramaters. + if (on_parents.__code__.co_argcount == 2): + self.on_parents = on_parents + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_parents parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) + raise ValueError("The function assigned to the on_parents parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_parents parameter is expected to be of type function but {on_parents_type} found.".format(on_parents_type=type(on_parents))) + raise TypeError("The value assigned to the on_parents parameter is expected to be of type function but {on_parents_type} found.".format(on_parents_type=type(on_parents))) + else: + self.on_parents = None + + # Check if the on_crossover exists. + if not (on_crossover is None): + # Check if the on_crossover is a method. + if inspect.ismethod(on_crossover): + # Check if the on_crossover method accepts 3 paramaters. + if (on_crossover.__code__.co_argcount == 3): + self.on_crossover = on_crossover + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_crossover parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring generated using crossover.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) + raise ValueError("The method assigned to the on_crossover parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring generated using crossover.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) + # Check if the on_crossover is a function. + elif callable(on_crossover): + # Check if the on_crossover function accepts 2 paramaters. + if (on_crossover.__code__.co_argcount == 2): + self.on_crossover = on_crossover + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_crossover parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring generated using crossover.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) + raise ValueError("The function assigned to the on_crossover parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring generated using crossover.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_crossover parameter is expected to be of type function but {on_crossover_type} found.".format(on_crossover_type=type(on_crossover))) + raise TypeError("The value assigned to the on_crossover parameter is expected to be of type function but {on_crossover_type} found.".format(on_crossover_type=type(on_crossover))) + else: + self.on_crossover = None + + # Check if the on_mutation exists. + if not (on_mutation is None): + # Check if the on_mutation is a method. + if inspect.ismethod(on_mutation): + # Check if the on_mutation method accepts 3 paramaters. + if (on_mutation.__code__.co_argcount == 3): + self.on_mutation = on_mutation + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_mutation parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring after applying the mutation operation.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) + raise ValueError("The method assigned to the on_mutation parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring after applying the mutation operation.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) + # Check if the on_mutation is a function. + elif callable(on_mutation): + # Check if the on_mutation function accepts 2 paramaters. + if (on_mutation.__code__.co_argcount == 2): + self.on_mutation = on_mutation + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_mutation parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring after applying the mutation operation.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) + raise ValueError("The function assigned to the on_mutation parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring after applying the mutation operation.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_mutation parameter is expected to be of type function but {on_mutation_type} found.".format(on_mutation_type=type(on_mutation))) + raise TypeError("The value assigned to the on_mutation parameter is expected to be of type function but {on_mutation_type} found.".format(on_mutation_type=type(on_mutation))) + else: + self.on_mutation = None + + # Check if the on_generation exists. + if not (on_generation is None): + # Check if the on_generation is a method. + if inspect.ismethod(on_generation): + # Check if the on_generation method accepts 2 paramaters. + if (on_generation.__code__.co_argcount == 2): + self.on_generation = on_generation + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_generation parameter must accept 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) + raise ValueError("The method assigned to the on_generation parameter must accept 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) + # Check if the on_generation is a function. + elif callable(on_generation): + # Check if the on_generation function accepts only a single paramater. + if (on_generation.__code__.co_argcount == 1): + self.on_generation = on_generation + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_generation parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) + raise ValueError("The function assigned to the on_generation parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the on_generation parameter is expected to be of type function but {on_generation_type} found.".format(on_generation_type=type(on_generation))) + raise TypeError("The value assigned to the on_generation parameter is expected to be of type function but {on_generation_type} found.".format(on_generation_type=type(on_generation))) + else: + self.on_generation = None + + # Check if the on_stop exists. + if not (on_stop is None): + # Check if the on_stop is a method. + if inspect.ismethod(on_stop): + # Check if the on_stop method accepts 3 paramaters. + if (on_stop.__code__.co_argcount == 3): + self.on_stop = on_stop + else: + self.valid_parameters = False + self.logger.error("The method assigned to the on_stop parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) A list of the fitness values of the solutions in the last population.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) + raise ValueError("The method assigned to the on_stop parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) A list of the fitness values of the solutions in the last population.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) + # Check if the on_stop is a function. + elif callable(on_stop): + # Check if the on_stop function accepts 2 paramaters. + if (on_stop.__code__.co_argcount == 2): + self.on_stop = on_stop + else: + self.valid_parameters = False + self.logger.error("The function assigned to the on_stop parameter must accept 2 parameters representing the instance of the genetic algorithm and a list of the fitness values of the solutions in the last population.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) + raise ValueError("The function assigned to the on_stop parameter must accept 2 parameters representing the instance of the genetic algorithm and a list of the fitness values of the solutions in the last population.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) + else: + self.valid_parameters = False + self.logger.error("The value assigned to the 'on_stop' parameter is expected to be of type function but {on_stop_type} found.".format(on_stop_type=type(on_stop))) + raise TypeError("The value assigned to the 'on_stop' parameter is expected to be of type function but {on_stop_type} found.".format(on_stop_type=type(on_stop))) + else: + self.on_stop = None + + # Validate delay_after_gen + if type(delay_after_gen) in GA.supported_int_float_types: + if delay_after_gen >= 0.0: + self.delay_after_gen = delay_after_gen + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'delay_after_gen' parameter must be a non-negative number. The value passed is ({delay_after_gen}) of type {delay_after_gen_type}.".format(delay_after_gen=delay_after_gen, delay_after_gen_type=type(delay_after_gen))) + raise ValueError("The value passed to the 'delay_after_gen' parameter must be a non-negative number. The value passed is ({delay_after_gen}) of type {delay_after_gen_type}.".format(delay_after_gen=delay_after_gen, delay_after_gen_type=type(delay_after_gen))) + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'delay_after_gen' parameter must be of type int or float but {delay_after_gen_type} found.".format(delay_after_gen_type=type(delay_after_gen))) + raise TypeError("The value passed to the 'delay_after_gen' parameter must be of type int or float but {delay_after_gen_type} found.".format(delay_after_gen_type=type(delay_after_gen))) + + # Validate save_best_solutions + if type(save_best_solutions) is bool: + if save_best_solutions == True: + if not self.suppress_warnings: warnings.warn("Use the 'save_best_solutions' parameter with caution as it may cause memory overflow when either the number of generations or number of genes is large.") + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'save_best_solutions' parameter must be of type bool but {save_best_solutions_type} found.".format(save_best_solutions_type=type(save_best_solutions))) + raise TypeError("The value passed to the 'save_best_solutions' parameter must be of type bool but {save_best_solutions_type} found.".format(save_best_solutions_type=type(save_best_solutions))) + + # Validate save_solutions + if type(save_solutions) is bool: + if save_solutions == True: + if not self.suppress_warnings: warnings.warn("Use the 'save_solutions' parameter with caution as it may cause memory overflow when either the number of generations, number of genes, or number of solutions in population is large.") + else: + self.valid_parameters = False + self.logger.error("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) + raise TypeError("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) + + # Validate allow_duplicate_genes + if not (type(allow_duplicate_genes) is bool): + self.valid_parameters = False + self.logger.error("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) + raise TypeError("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) + + self.allow_duplicate_genes = allow_duplicate_genes + + self.stop_criteria = [] + self.supported_stop_words = ["reach", "saturate"] + if stop_criteria is None: + # None: Stop after passing through all generations. + self.stop_criteria = None + elif type(stop_criteria) is str: + # reach_{target_fitness}: Stop if the target fitness value is reached. + # saturate_{num_generations}: Stop if the fitness value does not change (saturates) for the given number of generations. + criterion = stop_criteria.split("_") + if len(criterion) == 2: + stop_word = criterion[0] + number = criterion[1] + + if stop_word in self.supported_stop_words: + pass + else: + self.valid_parameters = False + self.logger.error("In the 'stop_criteria' parameter, the supported stop words are '{supported_stop_words}' but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) + raise ValueError("In the 'stop_criteria' parameter, the supported stop words are '{supported_stop_words}' but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) + + if number.replace(".", "").isnumeric(): + number = float(number) + else: + self.valid_parameters = False + self.logger.error("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) + raise ValueError("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) + + self.stop_criteria.append([stop_word, number]) + + else: + self.valid_parameters = False + self.logger.error("For format of a single criterion in the 'stop_criteria' parameter is 'word_number' but '{stop_criteria}' found.".format(stop_criteria=stop_criteria)) + raise ValueError("For format of a single criterion in the 'stop_criteria' parameter is 'word_number' but '{stop_criteria}' found.".format(stop_criteria=stop_criteria)) + + elif type(stop_criteria) in [list, tuple, numpy.ndarray]: + # Remove duplicate criterira by converting the list to a set then back to a list. + stop_criteria = list(set(stop_criteria)) + for idx, val in enumerate(stop_criteria): + if type(val) is str: + criterion = val.split("_") + if len(criterion) == 2: + stop_word = criterion[0] + number = criterion[1] + + if stop_word in self.supported_stop_words: + pass + else: + self.valid_parameters = False + self.logger.error("In the 'stop_criteria' parameter, the supported stop words are {supported_stop_words} but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) + raise ValueError("In the 'stop_criteria' parameter, the supported stop words are {supported_stop_words} but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) + + if number.replace(".", "").isnumeric(): + number = float(number) + else: + self.valid_parameters = False + self.logger.error("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) + raise ValueError("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) + + self.stop_criteria.append([stop_word, number]) + + else: + self.valid_parameters = False + self.logger.error("The format of a single criterion in the 'stop_criteria' parameter is 'word_number' but {stop_criteria} found.".format(stop_criteria=criterion)) + raise ValueError("The format of a single criterion in the 'stop_criteria' parameter is 'word_number' but {stop_criteria} found.".format(stop_criteria=criterion)) + else: + self.valid_parameters = False + self.logger.error("When the 'stop_criteria' parameter is assigned a tuple/list/numpy.ndarray, then its elements must be strings but the value ({stop_criteria_val}) of type {stop_criteria_val_type} found at index {stop_criteria_val_idx}.".format(stop_criteria_val=val, stop_criteria_val_type=type(val), stop_criteria_val_idx=idx)) + raise TypeError("When the 'stop_criteria' parameter is assigned a tuple/list/numpy.ndarray, then its elements must be strings but the value ({stop_criteria_val}) of type {stop_criteria_val_type} found at index {stop_criteria_val_idx}.".format(stop_criteria_val=val, stop_criteria_val_type=type(val), stop_criteria_val_idx=idx)) + else: + self.valid_parameters = False + self.logger.error("The expected value of the 'stop_criteria' is a single string or a list/tuple/numpy.ndarray of strings but the value ({stop_criteria_val}) of type {stop_criteria_type} found.".format(stop_criteria_val=stop_criteria, stop_criteria_type=type(stop_criteria))) + raise TypeError("The expected value of the 'stop_criteria' is a single string or a list/tuple/numpy.ndarray of strings but the value ({stop_criteria_val}) of type {stop_criteria_type} found.".format(stop_criteria_val=stop_criteria, stop_criteria_type=type(stop_criteria))) + + if parallel_processing is None: + self.parallel_processing = None + elif type(parallel_processing) in GA.supported_int_types: + if parallel_processing > 0: + self.parallel_processing = ["thread", parallel_processing] + else: + self.valid_parameters = False + self.logger.error("When the 'parallel_processing' parameter is assigned an integer, then the integer must be positive but the value ({parallel_processing_value}) found.".format(parallel_processing_value=parallel_processing)) + raise ValueError("When the 'parallel_processing' parameter is assigned an integer, then the integer must be positive but the value ({parallel_processing_value}) found.".format(parallel_processing_value=parallel_processing)) + elif type(parallel_processing) in [list, tuple]: + if len(parallel_processing) == 2: + if type(parallel_processing[0]) is str: + if parallel_processing[0] in ["process", "thread"]: + if (type(parallel_processing[1]) in GA.supported_int_types and parallel_processing[1] > 0) or (parallel_processing[1] == 0) or (parallel_processing[1] is None): + if parallel_processing[1] == 0: + # If the number of processes/threads is 0, this means no parallel processing is used. It is equivelant to setting parallel_processing=None. + self.parallel_processing = None + else: + # Whether the second value is None or a positive integer. + self.parallel_processing = parallel_processing + else: + self.valid_parameters = False + self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the second element must be an integer but the value ({second_value}) of type {second_value_type} found.".format(second_value=parallel_processing[1], second_value_type=type(parallel_processing[1]))) + raise TypeError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the second element must be an integer but the value ({second_value}) of type {second_value_type} found.".format(second_value=parallel_processing[1], second_value_type=type(parallel_processing[1]))) + else: + self.valid_parameters = False + self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the value of the first element must be either 'process' or 'thread' but the value ({first_value}) found.".format(first_value=parallel_processing[0])) + raise ValueError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the value of the first element must be either 'process' or 'thread' but the value ({first_value}) found.".format(first_value=parallel_processing[0])) + else: + self.valid_parameters = False + self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the first element must be of type 'str' but the value ({first_value}) of type {first_value_type} found.".format(first_value=parallel_processing[0], first_value_type=type(parallel_processing[0]))) + raise TypeError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the first element must be of type 'str' but the value ({first_value}) of type {first_value_type} found.".format(first_value=parallel_processing[0], first_value_type=type(parallel_processing[0]))) + else: + self.valid_parameters = False + self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then it must have 2 elements but ({num_elements}) found.".format(num_elements=len(parallel_processing))) + raise ValueError("When a list or tuple is assigned to the 'parallel_processing' parameter, then it must have 2 elements but ({num_elements}) found.".format(num_elements=len(parallel_processing))) + else: + self.valid_parameters = False + self.logger.error("Unexpected value ({parallel_processing_value}) of type ({parallel_processing_type}) assigned to the 'parallel_processing' parameter. The accepted values for this parameter are:\n1) None: (Default) It means no parallel processing is used.\n2) A positive integer referring to the number of threads to be used (i.e. threads, not processes, are used.\n3) list/tuple: If a list or a tuple of exactly 2 elements is assigned, then:\n\t*1) The first element can be either 'process' or 'thread' to specify whether processes or threads are used, respectively.\n\t*2) The second element can be:\n\t\t**1) A positive integer to select the maximum number of processes or threads to be used.\n\t\t**2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'.\n\t\t**3) None to use the default value as calculated by the concurrent.futures module.".format(parallel_processing_value=parallel_processing, parallel_processing_type=type(parallel_processing))) + raise ValueError("Unexpected value ({parallel_processing_value}) of type ({parallel_processing_type}) assigned to the 'parallel_processing' parameter. The accepted values for this parameter are:\n1) None: (Default) It means no parallel processing is used.\n2) A positive integer referring to the number of threads to be used (i.e. threads, not processes, are used.\n3) list/tuple: If a list or a tuple of exactly 2 elements is assigned, then:\n\t*1) The first element can be either 'process' or 'thread' to specify whether processes or threads are used, respectively.\n\t*2) The second element can be:\n\t\t**1) A positive integer to select the maximum number of processes or threads to be used.\n\t\t**2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'.\n\t\t**3) None to use the default value as calculated by the concurrent.futures module.".format(parallel_processing_value=parallel_processing, parallel_processing_type=type(parallel_processing))) + + # Set the `run_completed` property to False. It is set to `True` only after the `run()` method is complete. + self.run_completed = False + + # The number of completed generations. + self.generations_completed = 0 + + # At this point, all necessary parameters validation is done successfully and we are sure that the parameters are valid. + self.valid_parameters = True # Set to True when all the parameters passed in the GA class constructor are valid. + + # Parameters of the genetic algorithm. + self.num_generations = abs(num_generations) + self.parent_selection_type = parent_selection_type + + # Parameters of the mutation operation. + self.mutation_percent_genes = mutation_percent_genes + self.mutation_num_genes = mutation_num_genes + + # Even such this parameter is declared in the class header, it is assigned to the object here to access it after saving the object. + self.best_solutions_fitness = [] # A list holding the fitness value of the best solution for each generation. + + self.best_solution_generation = -1 # The generation number at which the best fitness value is reached. It is only assigned the generation number after the `run()` method completes. Otherwise, its value is -1. + + self.save_best_solutions = save_best_solutions + self.best_solutions = [] # Holds the best solution in each generation. + + self.save_solutions = save_solutions + self.solutions = [] # Holds the solutions in each generation. + self.solutions_fitness = [] # Holds the fitness of the solutions in each generation. + + self.last_generation_fitness = None # A list holding the fitness values of all solutions in the last generation. + self.last_generation_parents = None # A list holding the parents of the last generation. + self.last_generation_offspring_crossover = None # A list holding the offspring after applying crossover in the last generation. + self.last_generation_offspring_mutation = None # A list holding the offspring after applying mutation in the last generation. + self.previous_generation_fitness = None # Holds the fitness values of one generation before the fitness values saved in the last_generation_fitness attribute. Added in PyGAD 2.16.2. + self.last_generation_elitism = None # Added in PyGAD 2.18.0. A NumPy array holding the elitism of the current generation according to the value passed in the 'keep_elitism' parameter. It works only if the 'keep_elitism' parameter has a non-zero value. + self.last_generation_elitism_indices = None # Added in PyGAD 2.19.0. A NumPy array holding the indices of the elitism of the current generation. It works only if the 'keep_elitism' parameter has a non-zero value. + + def round_genes(self, solutions): + for gene_idx in range(self.num_genes): + if self.gene_type_single: + if not self.gene_type[1] is None: + solutions[:, gene_idx] = numpy.round(solutions[:, gene_idx], self.gene_type[1]) + else: + if not self.gene_type[gene_idx][1] is None: + solutions[:, gene_idx] = numpy.round(numpy.asarray(solutions[:, gene_idx], + dtype=self.gene_type[gene_idx][0]), + self.gene_type[gene_idx][1]) + return solutions + + def initialize_population(self, + low, + high, + allow_duplicate_genes, + mutation_by_replacement, + gene_type): + + """ + Creates an initial population randomly as a NumPy array. The array is saved in the instance attribute named 'population'. + + low: The lower value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20 and higher. + high: The upper value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20. + + This method assigns the values of the following 3 instance attributes: + 1. pop_size: Size of the population. + 2. population: Initially, holds the initial population and later updated after each generation. + 3. init_population: Keeping the initial population. + """ + + # Population size = (number of chromosomes, number of genes per chromosome) + self.pop_size = (self.sol_per_pop,self.num_genes) # The population will have sol_per_pop chromosome where each chromosome has num_genes genes. + + if self.gene_space is None: + # Creating the initial population randomly. + if self.gene_type_single == True: + self.population = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=self.pop_size), + dtype=self.gene_type[0]) # A NumPy array holding the initial population. + else: + # Create an empty population of dtype=object to support storing mixed data types within the same array. + self.population = numpy.zeros(shape=self.pop_size, dtype=object) + # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. + for gene_idx in range(self.num_genes): + # A vector of all values of this single gene across all solutions in the population. + gene_values = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=self.pop_size[0]), + dtype=self.gene_type[gene_idx][0]) + # Adding the current gene values to the population. + self.population[:, gene_idx] = gene_values + + if allow_duplicate_genes == False: + for solution_idx in range(self.population.shape[0]): + # self.logger.info("Before", self.population[solution_idx]) + self.population[solution_idx], _, _ = self.solve_duplicate_genes_randomly(solution=self.population[solution_idx], + min_val=low, + max_val=high, + mutation_by_replacement=True, + gene_type=gene_type, + num_trials=10) + # self.logger.info("After", self.population[solution_idx]) + + elif self.gene_space_nested: + if self.gene_type_single == True: + self.population = numpy.zeros(shape=self.pop_size, dtype=self.gene_type[0]) + for sol_idx in range(self.sol_per_pop): + for gene_idx in range(self.num_genes): + if type(self.gene_space[gene_idx]) in [list, tuple, range]: + # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. + if type(self.gene_space[gene_idx]) is range: + temp = self.gene_space[gene_idx] + else: + temp = self.gene_space[gene_idx].copy() + for idx, val in enumerate(self.gene_space[gene_idx]): + if val is None: + self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[0])[0] + self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) + self.population[sol_idx, gene_idx] = self.gene_type[0](self.population[sol_idx, gene_idx]) + self.gene_space[gene_idx] = temp + elif type(self.gene_space[gene_idx]) is dict: + if 'step' in self.gene_space[gene_idx].keys(): + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], + stop=self.gene_space[gene_idx]['high'], + step=self.gene_space[gene_idx]['step']), + size=1), + dtype=self.gene_type[0])[0] + else: + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=self.gene_space[gene_idx]['low'], + high=self.gene_space[gene_idx]['high'], + size=1), + dtype=self.gene_type[0])[0] + elif type(self.gene_space[gene_idx]) == type(None): + + # The following commented code replace the None value with a single number that will not change again. + # This means the gene value will be the same across all solutions. + # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + # high=high, + # size=1), dtype=self.gene_type[0])[0] + # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() + + # The above problem is solved by keeping the None value in the gene_space parameter. This forces PyGAD to generate this value for each solution. + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[0])[0] + elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: + self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] + else: + self.population = numpy.zeros(shape=self.pop_size, dtype=object) + for sol_idx in range(self.sol_per_pop): + for gene_idx in range(self.num_genes): + if type(self.gene_space[gene_idx]) in [list, tuple, range]: + # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. + temp = self.gene_space[gene_idx].copy() + for idx, val in enumerate(self.gene_space[gene_idx]): + if val is None: + self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[gene_idx][0])[0] + self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) + self.population[sol_idx, gene_idx] = self.gene_type[gene_idx][0](self.population[sol_idx, gene_idx]) + self.gene_space[gene_idx] = temp.copy() + elif type(self.gene_space[gene_idx]) is dict: + if 'step' in self.gene_space[gene_idx].keys(): + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], + stop=self.gene_space[gene_idx]['high'], + step=self.gene_space[gene_idx]['step']), + size=1), + dtype=self.gene_type[gene_idx][0])[0] + else: + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=self.gene_space[gene_idx]['low'], + high=self.gene_space[gene_idx]['high'], + size=1), + dtype=self.gene_type[gene_idx][0])[0] + elif type(self.gene_space[gene_idx]) == type(None): + # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + # high=high, + # size=1), + # dtype=self.gene_type[gene_idx][0])[0] + + # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() + + temp = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[gene_idx][0])[0] + self.population[sol_idx, gene_idx] = temp + elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: + self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] + else: + if self.gene_type_single == True: + # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. + for idx, curr_gene_space in enumerate(self.gene_space): + if curr_gene_space is None: + self.gene_space[idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[0])[0] + + # Creating the initial population by randomly selecting the genes' values from the values inside the 'gene_space' parameter. + if type(self.gene_space) is dict: + if 'step' in self.gene_space.keys(): + self.population = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=self.pop_size), + dtype=self.gene_type[0]) + else: + self.population = numpy.asarray(numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=self.pop_size), + dtype=self.gene_type[0]) # A NumPy array holding the initial population. + else: + self.population = numpy.asarray(numpy.random.choice(self.gene_space, + size=self.pop_size), + dtype=self.gene_type[0]) # A NumPy array holding the initial population. + else: + # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. + for gene_idx, curr_gene_space in enumerate(self.gene_space): + if curr_gene_space is None: + self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[gene_idx][0])[0] + + # Creating the initial population by randomly selecting the genes' values from the values inside the 'gene_space' parameter. + if type(self.gene_space) is dict: + # Create an empty population of dtype=object to support storing mixed data types within the same array. + self.population = numpy.zeros(shape=self.pop_size, dtype=object) + # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. + for gene_idx in range(self.num_genes): + # A vector of all values of this single gene across all solutions in the population. + if 'step' in self.gene_space[gene_idx].keys(): + gene_values = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], + stop=self.gene_space[gene_idx]['high'], + step=self.gene_space[gene_idx]['step']), + size=self.pop_size[0]), + dtype=self.gene_type[gene_idx][0]) + else: + gene_values = numpy.asarray(numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=self.pop_size[0]), + dtype=self.gene_type[gene_idx][0]) + # Adding the current gene values to the population. + self.population[:, gene_idx] = gene_values + + else: + # Create an empty population of dtype=object to support storing mixed data types within the same array. + self.population = numpy.zeros(shape=self.pop_size, dtype=object) + # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. + for gene_idx in range(self.num_genes): + # A vector of all values of this single gene across all solutions in the population. + gene_values = numpy.asarray(numpy.random.choice(self.gene_space, + size=self.pop_size[0]), + dtype=self.gene_type[gene_idx][0]) + # Adding the current gene values to the population. + self.population[:, gene_idx] = gene_values + + if not (self.gene_space is None): + if allow_duplicate_genes == False: + for sol_idx in range(self.population.shape[0]): + self.population[sol_idx], _, _ = self.solve_duplicate_genes_by_space(solution=self.population[sol_idx], + gene_type=self.gene_type, + num_trials=10, + build_initial_pop=True) + + # Keeping the initial population in the initial_population attribute. + self.initial_population = self.population.copy() + + def cal_pop_fitness(self): + + """ + Calculating the fitness values of batches of solutions in the current population. + It returns: + -fitness: An array of the calculated fitness values. + """ + + if self.valid_parameters == False: + self.logger.error("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") + raise Exception("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") + + # 'last_generation_parents_as_list' is the list version of 'self.last_generation_parents' + # It is used to return the parent index using the 'in' membership operator of Python lists. This is much faster than using 'numpy.where()'. + if self.last_generation_parents is not None: + last_generation_parents_as_list = [list(gen_parent) for gen_parent in self.last_generation_parents] + + # 'last_generation_elitism_as_list' is the list version of 'self.last_generation_elitism' + # It is used to return the elitism index using the 'in' membership operator of Python lists. This is much faster than using 'numpy.where()'. + if self.last_generation_elitism is not None: + last_generation_elitism_as_list = [list(gen_elitism) for gen_elitism in self.last_generation_elitism] + + pop_fitness = ["undefined"] * len(self.population) + if self.parallel_processing is None: + # Calculating the fitness value of each solution in the current population. + for sol_idx, sol in enumerate(self.population): + # Check if the `save_solutions` parameter is `True` and whether the solution already exists in the `solutions` list. If so, use its fitness rather than calculating it again. + # The functions numpy.any()/numpy.all()/numpy.where()/numpy.equal() are very slow. + # So, list membership operator 'in' is used to check if the solution exists in the 'self.solutions' list. + # Make sure that both the solution and 'self.solutions' are of type 'list' not 'numpy.ndarray'. + # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(self.solutions == numpy.array(sol), axis=1))): + # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(numpy.equal(self.solutions, numpy.array(sol)), axis=1))): + if (self.save_solutions) and (len(self.solutions) > 0) and (list(sol) in self.solutions): + solution_idx = self.solutions.index(list(sol)) + fitness = self.solutions_fitness[solution_idx] + elif (self.keep_elitism > 0) and (self.last_generation_elitism is not None) and (len(self.last_generation_elitism) > 0) and (list(sol) in last_generation_elitism_as_list): + # Return the index of the elitism from the elitism array 'self.last_generation_elitism'. + # This is not its index within the population. It is just its index in the 'self.last_generation_elitism' array. + elitism_idx = last_generation_elitism_as_list.index(list(sol)) + # Use the returned elitism index to return its index in the last population. + elitism_idx = self.last_generation_elitism_indices[elitism_idx] + # Use the elitism's index to return its pre-calculated fitness value. + fitness = self.previous_generation_fitness[elitism_idx] + # If the solutions are not saved (i.e. `save_solutions=False`), check if this solution is a parent from the previous generation and its fitness value is already calculated. If so, use the fitness value instead of calling the fitness function. + # We cannot use the `numpy.where()` function directly because it does not support the `axis` parameter. This is why the `numpy.all()` function is used to match the solutions on axis=1. + # elif (self.last_generation_parents is not None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): + elif ((self.keep_parents == -1) or (self.keep_parents > 0)) and (self.last_generation_parents is not None) and (len(self.last_generation_parents) > 0) and (list(sol) in last_generation_parents_as_list): + # Index of the parent in the 'self.last_generation_parents' array. + # This is not its index within the population. It is just its index in the 'self.last_generation_parents' array. + # parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] + parent_idx = last_generation_parents_as_list.index(list(sol)) + # Use the returned parent index to return its index in the last population. + parent_idx = self.last_generation_parents_indices[parent_idx] + # Use the parent's index to return its pre-calculated fitness value. + fitness = self.previous_generation_fitness[parent_idx] + else: + # Check if batch processing is used. If not, then calculate this missing fitness value. + if self.fitness_batch_size in [1, None]: + fitness = self.fitness_func(self, sol, sol_idx) + if type(fitness) in GA.supported_int_float_types: + pass + else: + self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + else: + # Reaching this point means that batch processing is in effect to calculate the fitness values. + # Do not continue the loop as no fitness is calculated. The fitness will be calculated later in batch mode. + continue + + # This is only executed if the fitness value was already calculated. + pop_fitness[sol_idx] = fitness + + if self.fitness_batch_size not in [1, None]: + # Reaching this block means that batch fitness calculation is used. + + # Indices of the solutions to calculate their fitness. + solutions_indices = numpy.where(numpy.array(pop_fitness) == "undefined")[0] + # Number of batches. + num_batches = int(numpy.ceil(len(solutions_indices) / self.fitness_batch_size)) + # For each batch, get its indices and call the fitness function. + for batch_idx in range(num_batches): + batch_first_index = batch_idx * self.fitness_batch_size + batch_last_index = (batch_idx + 1) * self.fitness_batch_size + batch_indices = solutions_indices[batch_first_index:batch_last_index] + batch_solutions = self.population[batch_indices, :] + + batch_fitness = self.fitness_func(self, batch_solutions, batch_indices) + if type(batch_fitness) not in [list, tuple, numpy.ndarray]: + self.logger.error("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) + raise TypeError("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) + elif len(numpy.array(batch_fitness)) != len(batch_indices): + self.logger.error("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) + raise ValueError("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) + + for index, fitness in zip(batch_indices, batch_fitness): + if type(fitness) in GA.supported_int_float_types: + pop_fitness[index] = fitness + else: + self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + else: + # Calculating the fitness value of each solution in the current population. + for sol_idx, sol in enumerate(self.population): + # Check if the `save_solutions` parameter is `True` and whether the solution already exists in the `solutions` list. If so, use its fitness rather than calculating it again. + # The functions numpy.any()/numpy.all()/numpy.where()/numpy.equal() are very slow. + # So, list membership operator 'in' is used to check if the solution exists in the 'self.solutions' list. + # Make sure that both the solution and 'self.solutions' are of type 'list' not 'numpy.ndarray'. + if (self.save_solutions) and (len(self.solutions) > 0) and (list(sol) in self.solutions): + solution_idx = self.solutions.index(list(sol)) + fitness = self.solutions_fitness[solution_idx] + pop_fitness[sol_idx] = fitness + elif (self.keep_elitism > 0) and (self.last_generation_elitism is not None) and (len(self.last_generation_elitism) > 0) and (list(sol) in last_generation_elitism_as_list): + # Return the index of the elitism from the elitism array 'self.last_generation_elitism'. + # This is not its index within the population. It is just its index in the 'self.last_generation_elitism' array. + elitism_idx = last_generation_elitism_as_list.index(list(sol)) + # Use the returned elitism index to return its index in the last population. + elitism_idx = self.last_generation_elitism_indices[elitism_idx] + # Use the elitism's index to return its pre-calculated fitness value. + fitness = self.previous_generation_fitness[elitism_idx] + + pop_fitness[sol_idx] = fitness + # If the solutions are not saved (i.e. `save_solutions=False`), check if this solution is a parent from the previous generation and its fitness value is already calculated. If so, use the fitness value instead of calling the fitness function. + # We cannot use the `numpy.where()` function directly because it does not support the `axis` parameter. This is why the `numpy.all()` function is used to match the solutions on axis=1. + # elif (self.last_generation_parents is not None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): + elif ((self.keep_parents == -1) or (self.keep_parents > 0)) and (self.last_generation_parents is not None) and (len(self.last_generation_parents) > 0) and (list(sol) in last_generation_parents_as_list): + # Index of the parent in the 'self.last_generation_parents' array. + # This is not its index within the population. It is just its index in the 'self.last_generation_parents' array. + # parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] + parent_idx = last_generation_parents_as_list.index(list(sol)) + # Use the returned parent index to return its index in the last population. + parent_idx = self.last_generation_parents_indices[parent_idx] + # Use the parent's index to return its pre-calculated fitness value. + fitness = self.previous_generation_fitness[parent_idx] + + pop_fitness[sol_idx] = fitness + + # Decide which class to use based on whether the user selected "process" or "thread" + if self.parallel_processing[0] == "process": + ExecutorClass = concurrent.futures.ProcessPoolExecutor + else: + ExecutorClass = concurrent.futures.ThreadPoolExecutor + + # We can use a with statement to ensure threads are cleaned up promptly (https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example) + with ExecutorClass(max_workers=self.parallel_processing[1]) as executor: + solutions_to_submit_indices = [] + solutions_to_submit = [] + for sol_idx, sol in enumerate(self.population): + # The "undefined" value means that the fitness of this solution must be calculated. + if pop_fitness[sol_idx] == "undefined": + solutions_to_submit.append(sol.copy()) + solutions_to_submit_indices.append(sol_idx) + + # Check if batch processing is used. If not, then calculate the fitness value for individual solutions. + if self.fitness_batch_size in [1, None]: + for index, fitness in zip(solutions_to_submit_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), solutions_to_submit, solutions_to_submit_indices)): + if type(fitness) in GA.supported_int_float_types: + pop_fitness[index] = fitness + else: + self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + else: + # Reaching this block means that batch processing is used. The fitness values are calculated in batches. + + # Number of batches. + num_batches = int(numpy.ceil(len(solutions_to_submit_indices) / self.fitness_batch_size)) + # Each element of the `batches_solutions` list represents the solutions in one batch. + batches_solutions = [] + # Each element of the `batches_indices` list represents the solutions' indices in one batch. + batches_indices = [] + # For each batch, get its indices and call the fitness function. + for batch_idx in range(num_batches): + batch_first_index = batch_idx * self.fitness_batch_size + batch_last_index = (batch_idx + 1) * self.fitness_batch_size + batch_indices = solutions_to_submit_indices[batch_first_index:batch_last_index] + batch_solutions = self.population[batch_indices, :] + + batches_solutions.append(batch_solutions) + batches_indices.append(batch_indices) + + for batch_indices, batch_fitness in zip(batches_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), batches_solutions, batches_indices)): + if type(batch_fitness) not in [list, tuple, numpy.ndarray]: + self.logger.error("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) + raise TypeError("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) + elif len(numpy.array(batch_fitness)) != len(batch_indices): + self.logger.error("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) + raise ValueError("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) + + for index, fitness in zip(batch_indices, batch_fitness): + if type(fitness) in GA.supported_int_float_types: + pop_fitness[index] = fitness + else: + self.logger.error("The fitness function should return a number but the value ({fit_val}) of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + raise ValueError("The fitness function should return a number but the value ({fit_val}) of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) + + pop_fitness = numpy.array(pop_fitness) + + return pop_fitness + + def run(self): + + """ + Runs the genetic algorithm. This is the main method in which the genetic algorithm is evolved through a number of generations. + """ + + if self.valid_parameters == False: + self.logger.error("Error calling the run() method: \nThe run() method cannot be executed with invalid parameters. Please check the parameters passed while creating an instance of the GA class.\n") + raise Exception("Error calling the run() method: \nThe run() method cannot be executed with invalid parameters. Please check the parameters passed while creating an instance of the GA class.\n") + + # Starting from PyGAD 2.18.0, the 4 properties (best_solutions, best_solutions_fitness, solutions, and solutions_fitness) are no longer reset with each call to the run() method. Instead, they are extended. + # For example, if there are 50 generations and the user set save_best_solutions=True, then the length of the 2 properties best_solutions and best_solutions_fitness will be 50 after the first call to the run() method, then 100 after the second call, 150 after the third, and so on. + + # self.best_solutions: Holds the best solution in each generation. + if type(self.best_solutions) is numpy.ndarray: + self.best_solutions = list(self.best_solutions) + # self.best_solutions_fitness: A list holding the fitness value of the best solution for each generation. + if type(self.best_solutions_fitness) is numpy.ndarray: + self.best_solutions_fitness = list(self.best_solutions_fitness) + # self.solutions: Holds the solutions in each generation. + if type(self.solutions) is numpy.ndarray: + self.solutions = list(self.solutions) + # self.solutions_fitness: Holds the fitness of the solutions in each generation. + if type(self.solutions_fitness) is numpy.ndarray: + self.solutions_fitness = list(self.solutions_fitness) + + if not (self.on_start is None): + self.on_start(self) + + stop_run = False + + # To continue from where we stopped, the first generation index should start from the value of the 'self.generations_completed' parameter. + if self.generations_completed != 0 and type(self.generations_completed) in GA.supported_int_types: + # If the 'self.generations_completed' parameter is not '0', then this means we continue execution. + generation_first_idx = self.generations_completed + generation_last_idx = self.num_generations + self.generations_completed + else: + # If the 'self.generations_completed' parameter is '0', then stat from scratch. + generation_first_idx = 0 + generation_last_idx = self.num_generations + + # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. + self.last_generation_fitness = self.cal_pop_fitness() + + best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) + + # Appending the best solution in the initial population to the best_solutions list. + if self.save_best_solutions: + self.best_solutions.append(best_solution) + + for generation in range(generation_first_idx, generation_last_idx): + if not (self.on_fitness is None): + self.on_fitness(self, self.last_generation_fitness) + + # Appending the fitness value of the best solution in the current generation to the best_solutions_fitness attribute. + self.best_solutions_fitness.append(best_solution_fitness) + + # Appending the solutions in the current generation to the solutions list. + if self.save_solutions: + # self.solutions.extend(self.population.copy()) + population_as_list = self.population.copy() + population_as_list = [list(item) for item in population_as_list] + self.solutions.extend(population_as_list) + + self.solutions_fitness.extend(self.last_generation_fitness) + + # Selecting the best parents in the population for mating. + if callable(self.parent_selection_type): + self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self, + self.last_generation_fitness, + self.num_parents_mating, self) + if not type(self.last_generation_parents) is numpy.ndarray: + self.logger.error("The type of the iterable holding the selected parents is expected to be (numpy.ndarray) but {last_generation_parents_type} found.".format(last_generation_parents_type=type(self.last_generation_parents))) + raise TypeError("The type of the iterable holding the selected parents is expected to be (numpy.ndarray) but {last_generation_parents_type} found.".format(last_generation_parents_type=type(self.last_generation_parents))) + if not type(self.last_generation_parents_indices) is numpy.ndarray: + self.logger.error("The type of the iterable holding the selected parents' indices is expected to be (numpy.ndarray) but {last_generation_parents_indices_type} found.".format(last_generation_parents_indices_type=type(self.last_generation_parents_indices))) + raise TypeError("The type of the iterable holding the selected parents' indices is expected to be (numpy.ndarray) but {last_generation_parents_indices_type} found.".format(last_generation_parents_indices_type=type(self.last_generation_parents_indices))) + else: + self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self.last_generation_fitness, + num_parents=self.num_parents_mating) + + # Validate the output of the parent selection step: self.select_parents() + if self.last_generation_parents.shape != (self.num_parents_mating, self.num_genes): + if self.last_generation_parents.shape[0] != self.num_parents_mating: + self.logger.error("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. It is expected to select ({num_parents_mating}) parents but ({num_parents_mating_selected}) selected.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_parents_mating=self.num_parents_mating, num_parents_mating_selected=self.last_generation_parents.shape[0])) + raise ValueError("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. It is expected to select ({num_parents_mating}) parents but ({num_parents_mating_selected}) selected.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_parents_mating=self.num_parents_mating, num_parents_mating_selected=self.last_generation_parents.shape[0])) + elif self.last_generation_parents.shape[1] != self.num_genes: + self.logger.error("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. Parents are expected to have ({num_genes}) genes but ({num_genes_selected}) produced.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_genes=self.num_genes, num_genes_selected=self.last_generation_parents.shape[1])) + raise ValueError("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. Parents are expected to have ({num_genes}) genes but ({num_genes_selected}) produced.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_genes=self.num_genes, num_genes_selected=self.last_generation_parents.shape[1])) + + if self.last_generation_parents_indices.ndim != 1: + self.logger.error("The iterable holding the selected parents indices is expected to have 1 dimension but ({parents_indices_ndim}) found.".format(parents_indices_ndim=len(self.last_generation_parents_indices))) + raise ValueError("The iterable holding the selected parents indices is expected to have 1 dimension but ({parents_indices_ndim}) found.".format(parents_indices_ndim=len(self.last_generation_parents_indices))) + elif len(self.last_generation_parents_indices) != self.num_parents_mating: + self.logger.error("The iterable holding the selected parents indices is expected to have ({num_parents_mating}) values but ({num_parents_mating_selected}) found.".format(num_parents_mating=self.num_parents_mating, num_parents_mating_selected=len(self.last_generation_parents_indices))) + raise ValueError("The iterable holding the selected parents indices is expected to have ({num_parents_mating}) values but ({num_parents_mating_selected}) found.".format(num_parents_mating=self.num_parents_mating, num_parents_mating_selected=len(self.last_generation_parents_indices))) + + if not (self.on_parents is None): + self.on_parents(self, self.last_generation_parents) + + # If self.crossover_type=None, then no crossover is applied and thus no offspring will be created in the next generations. The next generation will use the solutions in the current population. + if self.crossover_type is None: + if self.keep_elitism == 0: + num_parents_to_keep = self.num_parents_mating if self.keep_parents == -1 else self.keep_parents + if self.num_offspring <= num_parents_to_keep: + self.last_generation_offspring_crossover = self.last_generation_parents[0:self.num_offspring] + else: + self.last_generation_offspring_crossover = numpy.concatenate((self.last_generation_parents, self.population[0:(self.num_offspring - self.last_generation_parents.shape[0])])) + else: + # The steady_state_selection() function is called to select the best solutions (i.e. elitism). The keep_elitism parameter defines the number of these solutions. + # The steady_state_selection() function is still called here even if its output may not be used given that the condition of the next if statement is True. The reason is that it will be used later. + self.last_generation_elitism, _ = self.steady_state_selection(self.last_generation_fitness, + num_parents=self.keep_elitism) + if self.num_offspring <= self.keep_elitism: + self.last_generation_offspring_crossover = self.last_generation_parents[0:self.num_offspring] + else: + self.last_generation_offspring_crossover = numpy.concatenate((self.last_generation_elitism, self.population[0:(self.num_offspring - self.last_generation_elitism.shape[0])])) + else: + # Generating offspring using crossover. + if callable(self.crossover_type): + self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, + (self.num_offspring, self.num_genes), + self) + if not type(self.last_generation_offspring_crossover) is numpy.ndarray: + self.logger.error("The output of the crossover step is expected to be of type (numpy.ndarray) but {last_generation_offspring_crossover_type} found.".format(last_generation_offspring_crossover_type=type(self.last_generation_offspring_crossover))) + raise TypeError("The output of the crossover step is expected to be of type (numpy.ndarray) but {last_generation_offspring_crossover_type} found.".format(last_generation_offspring_crossover_type=type(self.last_generation_offspring_crossover))) + else: + self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, + offspring_size=(self.num_offspring, self.num_genes)) + if self.last_generation_offspring_crossover.shape != (self.num_offspring, self.num_genes): + if self.last_generation_offspring_crossover.shape[0] != self.num_offspring: + self.logger.error("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_crossover.shape[0])) + raise ValueError("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_crossover.shape[0])) + elif self.last_generation_offspring_crossover.shape[1] != self.num_genes: + self.logger.error("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_crossover.shape[1])) + raise ValueError("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_crossover.shape[1])) + + # PyGAD 2.18.2 // The on_crossover() callback function is called even if crossover_type is None. + if not (self.on_crossover is None): + self.on_crossover(self, self.last_generation_offspring_crossover) + + # If self.mutation_type=None, then no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. + if self.mutation_type is None: + self.last_generation_offspring_mutation = self.last_generation_offspring_crossover + else: + # Adding some variations to the offspring using mutation. + if callable(self.mutation_type): + self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover, + self) + if not type(self.last_generation_offspring_mutation) is numpy.ndarray: + self.logger.error("The output of the mutation step is expected to be of type (numpy.ndarray) but {last_generation_offspring_mutation_type} found.".format(last_generation_offspring_mutation_type=type(self.last_generation_offspring_mutation))) + raise TypeError("The output of the mutation step is expected to be of type (numpy.ndarray) but {last_generation_offspring_mutation_type} found.".format(last_generation_offspring_mutation_type=type(self.last_generation_offspring_mutation))) + else: + self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover) + + if self.last_generation_offspring_mutation.shape != (self.num_offspring, self.num_genes): + if self.last_generation_offspring_mutation.shape[0] != self.num_offspring: + self.logger.error("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_mutation.shape[0])) + raise ValueError("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_mutation.shape[0])) + elif self.last_generation_offspring_mutation.shape[1] != self.num_genes: + self.logger.error("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_mutation.shape[1])) + raise ValueError("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_mutation.shape[1])) + + # PyGAD 2.18.2 // The on_mutation() callback function is called even if mutation_type is None. + if not (self.on_mutation is None): + self.on_mutation(self, self.last_generation_offspring_mutation) + + # Update the population attribute according to the offspring generated. + if self.keep_elitism == 0: + # If the keep_elitism parameter is 0, then the keep_parents parameter will be used to decide if the parents are kept in the next generation. + if (self.keep_parents == 0): + self.population = self.last_generation_offspring_mutation + elif (self.keep_parents == -1): + # Creating the new population based on the parents and offspring. + self.population[0:self.last_generation_parents.shape[0], :] = self.last_generation_parents + self.population[self.last_generation_parents.shape[0]:, :] = self.last_generation_offspring_mutation + elif (self.keep_parents > 0): + parents_to_keep, _ = self.steady_state_selection(self.last_generation_fitness, + num_parents=self.keep_parents) + self.population[0:parents_to_keep.shape[0], :] = parents_to_keep + self.population[parents_to_keep.shape[0]:, :] = self.last_generation_offspring_mutation + else: + self.last_generation_elitism, self.last_generation_elitism_indices = self.steady_state_selection(self.last_generation_fitness, + num_parents=self.keep_elitism) + self.population[0:self.last_generation_elitism.shape[0], :] = self.last_generation_elitism + self.population[self.last_generation_elitism.shape[0]:, :] = self.last_generation_offspring_mutation + + self.generations_completed = generation + 1 # The generations_completed attribute holds the number of the last completed generation. + + self.previous_generation_fitness = self.last_generation_fitness.copy() + # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. + self.last_generation_fitness = self.cal_pop_fitness() + + best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) + + # Appending the best solution in the current generation to the best_solutions list. + if self.save_best_solutions: + self.best_solutions.append(best_solution) + + # If the on_generation attribute is not None, then cal the callback function after the generation. + if not (self.on_generation is None): + r = self.on_generation(self) + if type(r) is str and r.lower() == "stop": + # Before aborting the loop, save the fitness value of the best solution. + # _, best_solution_fitness, _ = self.best_solution() + self.best_solutions_fitness.append(best_solution_fitness) + break + + if not self.stop_criteria is None: + for criterion in self.stop_criteria: + if criterion[0] == "reach": + if max(self.last_generation_fitness) >= criterion[1]: + stop_run = True + break + elif criterion[0] == "saturate": + criterion[1] = int(criterion[1]) + if (self.generations_completed >= criterion[1]): + if (self.best_solutions_fitness[self.generations_completed - criterion[1]] - self.best_solutions_fitness[self.generations_completed - 1]) == 0: + stop_run = True + break + + if stop_run: + break + + time.sleep(self.delay_after_gen) + + # Save the fitness of the last generation. + if self.save_solutions: + # self.solutions.extend(self.population.copy()) + population_as_list = self.population.copy() + population_as_list = [list(item) for item in population_as_list] + self.solutions.extend(population_as_list) + + self.solutions_fitness.extend(self.last_generation_fitness) + + # Save the fitness value of the best solution. + _, best_solution_fitness, _ = self.best_solution(pop_fitness=self.last_generation_fitness) + self.best_solutions_fitness.append(best_solution_fitness) + + self.best_solution_generation = numpy.where(numpy.array(self.best_solutions_fitness) == numpy.max(numpy.array(self.best_solutions_fitness)))[0][0] + # After the run() method completes, the run_completed flag is changed from False to True. + self.run_completed = True # Set to True only after the run() method completes gracefully. + + if not (self.on_stop is None): + self.on_stop(self, self.last_generation_fitness) + + # Converting the 'best_solutions' list into a NumPy array. + self.best_solutions = numpy.array(self.best_solutions) + + # Converting the 'solutions' list into a NumPy array. + # self.solutions = numpy.array(self.solutions) + + def best_solution(self, pop_fitness=None): + + """ + Returns information about the best solution found by the genetic algorithm. + Accepts the following parameters: + pop_fitness: An optional parameter holding the fitness values of the solutions in the latest population. If passed, then it save time calculating the fitness. If None, then the 'cal_pop_fitness()' method is called to calculate the fitness of the latest population. + The following are returned: + -best_solution: Best solution in the current population. + -best_solution_fitness: Fitness value of the best solution. + -best_match_idx: Index of the best solution in the current population. + """ + + if pop_fitness is None: + # If the 'pop_fitness' parameter is not passed, then we have to call the 'cal_pop_fitness()' method to calculate the fitness of all solutions in the lastest population. + pop_fitness = self.cal_pop_fitness() + # Verify the type of the 'pop_fitness' parameter. + elif type(pop_fitness) in [tuple, list, numpy.ndarray]: + # Verify that the length of the passed population fitness matches the length of the 'self.population' attribute. + if len(pop_fitness) == len(self.population): + # This successfully verifies the 'pop_fitness' parameter. + pass + else: + self.logger.error("The length of the list/tuple/numpy.ndarray passed to the 'pop_fitness' parameter ({pop_fitness_length}) must match the length of the 'self.population' attribute ({population_length}).".format(pop_fitness_length=len(pop_fitness), population_length=len(self.population))) + raise ValueError("The length of the list/tuple/numpy.ndarray passed to the 'pop_fitness' parameter ({pop_fitness_length}) must match the length of the 'self.population' attribute ({population_length}).".format(pop_fitness_length=len(pop_fitness), population_length=len(self.population))) + else: + self.logger.error("The type of the 'pop_fitness' parameter is expected to be list, tuple, or numpy.ndarray but ({pop_fitness_type}) found.".format(pop_fitness_type=type(pop_fitness))) + raise ValueError("The type of the 'pop_fitness' parameter is expected to be list, tuple, or numpy.ndarray but ({pop_fitness_type}) found.".format(pop_fitness_type=type(pop_fitness))) + + # Return the index of the best solution that has the best fitness value. + best_match_idx = numpy.where(pop_fitness == numpy.max(pop_fitness))[0][0] + + best_solution = self.population[best_match_idx, :].copy() + best_solution_fitness = pop_fitness[best_match_idx] + + return best_solution, best_solution_fitness, best_match_idx + + def save(self, filename): + + """ + Saves the genetic algorithm instance: + -filename: Name of the file to save the instance. No extension is needed. + """ + + cloudpickle_serialized_object = cloudpickle.dumps(self) + with open(filename + ".pkl", 'wb') as file: + file.write(cloudpickle_serialized_object) + cloudpickle.dump(self, file) + + def summary(self, + line_length=70, + fill_character=" ", + line_character="-", + line_character2="=", + columns_equal_len=False, + print_step_parameters=True, + print_parameters_summary=True): + """ + The summary() method prints a summary of the PyGAD lifecycle in a Keras style. + The parameters are: + line_length: An integer representing the length of the single line in characters. + fill_character: A character to fill the lines. + line_character: A character for creating a line separator. + line_character2: A secondary character to create a line separator. + columns_equal_len: The table rows are split into equal-sized columns or split subjective to the width needed. + print_step_parameters: Whether to print extra parameters about each step inside the step. If print_step_parameters=False and print_parameters_summary=True, then the parameters of each step are printed at the end of the table. + print_parameters_summary: Whether to print parameters summary at the end of the table. If print_step_parameters=False, then the parameters of each step are printed at the end of the table too. + """ + + summary_output = "" + + def fill_message(msg, line_length=line_length, fill_character=fill_character): + num_spaces = int((line_length - len(msg))/2) + num_spaces = int(num_spaces / len(fill_character)) + msg = "{spaces}{msg}{spaces}".format(msg=msg, spaces=fill_character * num_spaces) + return msg + + def line_separator(line_length=line_length, line_character=line_character): + num_characters = int(line_length / len(line_character)) + return line_character * num_characters + + def create_row(columns, line_length=line_length, fill_character=fill_character, split_percentages=None): + filled_columns = [] + if split_percentages == None: + split_percentages = [int(100/len(columns))] * 3 + columns_lengths = [int((split_percentages[idx] * line_length) / 100) for idx in range(len(split_percentages))] + for column_idx, column in enumerate(columns): + current_column_length = len(column) + extra_characters = columns_lengths[column_idx] - current_column_length + filled_column = column + fill_character * extra_characters + filled_column = column + fill_character * extra_characters + filled_columns.append(filled_column) + + return "".join(filled_columns) + + def print_parent_selection_params(): + nonlocal summary_output + m = "Number of Parents: {num_parents_mating}".format(num_parents_mating=self.num_parents_mating) + self.logger.info(m) + summary_output = summary_output + m + "\n" + if self.parent_selection_type == "tournament": + m = "K Tournament: {K_tournament}".format(K_tournament=self.K_tournament) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + def print_fitness_params(): + nonlocal summary_output + if not self.fitness_batch_size is None: + m = "Fitness batch size: {fitness_batch_size}".format(fitness_batch_size=self.fitness_batch_size) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + def print_crossover_params(): + nonlocal summary_output + if not self.crossover_probability is None: + m = "Crossover probability: {crossover_probability}".format(crossover_probability=self.crossover_probability) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + def print_mutation_params(): + nonlocal summary_output + if not self.mutation_probability is None: + m = "Mutation Probability: {mutation_probability}".format(mutation_probability=self.mutation_probability) + self.logger.info(m) + summary_output = summary_output + m + "\n" + if self.mutation_percent_genes == "default": + m = "Mutation Percentage: {mutation_percent_genes}".format(mutation_percent_genes=self.mutation_percent_genes) + self.logger.info(m) + summary_output = summary_output + m + "\n" + # Number of mutation genes is already showed above. + m = "Mutation Genes: {mutation_num_genes}".format(mutation_num_genes=self.mutation_num_genes) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Random Mutation Range: ({random_mutation_min_val}, {random_mutation_max_val})".format(random_mutation_min_val=self.random_mutation_min_val, random_mutation_max_val=self.random_mutation_max_val) + self.logger.info(m) + summary_output = summary_output + m + "\n" + if not self.gene_space is None: + m = "Gene Space: {gene_space}".format(gene_space=self.gene_space) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Mutation by Replacement: {mutation_by_replacement}".format(mutation_by_replacement=self.mutation_by_replacement) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Allow Duplicated Genes: {allow_duplicate_genes}".format(allow_duplicate_genes=self.allow_duplicate_genes) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + def print_on_generation_params(): + nonlocal summary_output + if not self.stop_criteria is None: + m = "Stop Criteria: {stop_criteria}".format(stop_criteria=self.stop_criteria) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + def print_params_summary(): + nonlocal summary_output + m = "Population Size: ({sol_per_pop}, {num_genes})".format(sol_per_pop=self.sol_per_pop, num_genes=self.num_genes) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Number of Generations: {num_generations}".format(num_generations=self.num_generations) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Initial Population Range: ({init_range_low}, {init_range_high})".format(init_range_low=self.init_range_low, init_range_high=self.init_range_high) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + if not print_step_parameters: + print_fitness_params() + + if not print_step_parameters: + print_parent_selection_params() + + if self.keep_elitism != 0: + m = "Keep Elitism: {keep_elitism}".format(keep_elitism=self.keep_elitism) + self.logger.info(m) + summary_output = summary_output + m + "\n" + else: + m = "Keep Parents: {keep_parents}".format(keep_parents=self.keep_parents) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Gene DType: {gene_type}".format(gene_type=self.gene_type) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + if not print_step_parameters: + print_crossover_params() + + if not print_step_parameters: + print_mutation_params() + + if self.delay_after_gen != 0: + m = "Post-Generation Delay: {delay_after_gen}".format(delay_after_gen=self.delay_after_gen) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + if not print_step_parameters: + print_on_generation_params() + + if not self.parallel_processing is None: + m = "Parallel Processing: {parallel_processing}".format(parallel_processing=self.parallel_processing) + self.logger.info(m) + summary_output = summary_output + m + "\n" + if not self.random_seed is None: + m = "Random Seed: {random_seed}".format(random_seed=self.random_seed) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Save Best Solutions: {save_best_solutions}".format(save_best_solutions=self.save_best_solutions) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = "Save Solutions: {save_solutions}".format(save_solutions=self.save_solutions) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + m = line_separator(line_character=line_character) + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = fill_message("PyGAD Lifecycle") + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = line_separator(line_character=line_character2) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + lifecycle_steps = ["on_start()", "Fitness Function", "On Fitness", "Parent Selection", "On Parents", "Crossover", "On Crossover", "Mutation", "On Mutation", "On Generation", "On Stop"] + lifecycle_functions = [self.on_start, self.fitness_func, self.on_fitness, self.select_parents, self.on_parents, self.crossover, self.on_crossover, self.mutation, self.on_mutation, self.on_generation, self.on_stop] + lifecycle_functions = [getattr(lifecycle_func, '__name__', "None") for lifecycle_func in lifecycle_functions] + lifecycle_functions = [lifecycle_func + "()" if lifecycle_func != "None" else "None" for lifecycle_func in lifecycle_functions] + lifecycle_output = ["None", "(1)", "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "None", "None"] + lifecycle_step_parameters = [None, print_fitness_params, None, print_parent_selection_params, None, print_crossover_params, None, print_mutation_params, None, print_on_generation_params, None] + + if not columns_equal_len: + max_lengthes = [max(list(map(len, lifecycle_steps))), max(list(map(len, lifecycle_functions))), max(list(map(len, lifecycle_output)))] + split_percentages = [int((column_len / sum(max_lengthes)) * 100) for column_len in max_lengthes] + else: + split_percentages = None + + header_columns = ["Step", "Handler", "Output Shape"] + header_row = create_row(header_columns, split_percentages=split_percentages) + m = header_row + self.logger.info(m) + summary_output = summary_output + m + "\n" + m = line_separator(line_character=line_character2) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + for lifecycle_idx in range(len(lifecycle_steps)): + lifecycle_column = [lifecycle_steps[lifecycle_idx], lifecycle_functions[lifecycle_idx], lifecycle_output[lifecycle_idx]] + if lifecycle_column[1] == "None": + continue + lifecycle_row = create_row(lifecycle_column, split_percentages=split_percentages) + m = lifecycle_row + self.logger.info(m) + summary_output = summary_output + m + "\n" + if print_step_parameters: + if not lifecycle_step_parameters[lifecycle_idx] is None: + lifecycle_step_parameters[lifecycle_idx]() + m = line_separator(line_character=line_character) + self.logger.info(m) + summary_output = summary_output + m + "\n" + + m = line_separator(line_character=line_character2) + self.logger.info(m) + summary_output = summary_output + m + "\n" + if print_parameters_summary: + print_params_summary() + m = line_separator(line_character=line_character2) + self.logger.info(m) + summary_output = summary_output + m + "\n" + return summary_output + +def load(filename): + + """ + Reads a saved instance of the genetic algorithm: + -filename: Name of the file to read the instance. No extension is needed. + Returns the genetic algorithm instance. + """ + + try: + with open(filename + ".pkl", 'rb') as file: + ga_in = cloudpickle.load(file) + except FileNotFoundError: + raise FileNotFoundError("Error reading the file {filename}. Please check your inputs.".format(filename=filename)) + except: + # raise BaseException("Error loading the file. If the file already exists, please reload all the functions previously used (e.g. fitness function).") + raise BaseException("Error loading the file.") + return ga_in diff --git a/pygad/torchga/__init__.py b/pygad/torchga/__init__.py new file mode 100644 index 0000000..7e51570 --- /dev/null +++ b/pygad/torchga/__init__.py @@ -0,0 +1,3 @@ +from .torchga import * + +__version__ = "1.3.0" diff --git a/pygad/torchga/torchga.py b/pygad/torchga/torchga.py new file mode 100644 index 0000000..cff6d2e --- /dev/null +++ b/pygad/torchga/torchga.py @@ -0,0 +1,90 @@ +import copy +import numpy +import torch + +def model_weights_as_vector(model): + weights_vector = [] + + for curr_weights in model.state_dict().values(): + # Calling detach() to remove the computational graph from the layer. + # cpu() is called for making shore the data is moved from GPU to cpu + # numpy() is called for converting the tensor into a NumPy array. + curr_weights = curr_weights.cpu().detach().numpy() + vector = numpy.reshape(curr_weights, newshape=(curr_weights.size)) + weights_vector.extend(vector) + + return numpy.array(weights_vector) + +def model_weights_as_dict(model, weights_vector): + weights_dict = model.state_dict() + + start = 0 + for key in weights_dict: + # Calling detach() to remove the computational graph from the layer. + # cpu() is called for making shore the data is moved from GPU to cpu + # numpy() is called for converting the tensor into a NumPy array. + w_matrix = weights_dict[key].cpu().detach().numpy() + layer_weights_shape = w_matrix.shape + layer_weights_size = w_matrix.size + + layer_weights_vector = weights_vector[start:start + layer_weights_size] + layer_weights_matrix = numpy.reshape(layer_weights_vector, newshape=(layer_weights_shape)) + weights_dict[key] = torch.from_numpy(layer_weights_matrix) + + start = start + layer_weights_size + + return weights_dict + +def predict(model, solution, data): + # Fetch the parameters of the best solution. + model_weights_dict = model_weights_as_dict(model=model, + weights_vector=solution) + + # Use the current solution as the model parameters. + _model = copy.deepcopy(model) + _model.load_state_dict(model_weights_dict) + + predictions = _model(data) + + return predictions + +class TorchGA: + + def __init__(self, model, num_solutions): + + """ + Creates an instance of the TorchGA class to build a population of model parameters. + + model: A PyTorch model class. + num_solutions: Number of solutions in the population. Each solution has different model parameters. + """ + + self.model = model + + self.num_solutions = num_solutions + + # A list holding references to all the solutions (i.e. networks) used in the population. + self.population_weights = self.create_population() + + def create_population(self): + + """ + Creates the initial population of the genetic algorithm as a list of networks' weights (i.e. solutions). Each element in the list holds a different weights of the PyTorch model. + + The method returns a list holding the weights of all solutions. + """ + + model_weights_vector = model_weights_as_vector(model=self.model) + + net_population_weights = [] + net_population_weights.append(model_weights_vector) + + for idx in range(self.num_solutions-1): + + net_weights = copy.deepcopy(model_weights_vector) + net_weights = numpy.array(net_weights) + numpy.random.uniform(low=-1.0, high=1.0, size=model_weights_vector.size) + + # Appending the weights to the population. + net_population_weights.append(net_weights) + + return net_population_weights diff --git a/pygad/utils/__init__.py b/pygad/utils/__init__.py new file mode 100644 index 0000000..dde5633 --- /dev/null +++ b/pygad/utils/__init__.py @@ -0,0 +1,5 @@ +from pygad.utils import parent_selection +from pygad.utils import crossover +from pygad.utils import mutation + +__version__ = "1.0.0" \ No newline at end of file diff --git a/pygad/utils/crossover.py b/pygad/utils/crossover.py new file mode 100644 index 0000000..8eff279 --- /dev/null +++ b/pygad/utils/crossover.py @@ -0,0 +1,248 @@ +""" +The pygad.utils.crossover module has all the built-in crossover operators. +""" + +import numpy +import random + +class Crossover: + def single_point_crossover(self, parents, offspring_size): + + """ + Applies the single-point crossover. It selects a point randomly at which crossover takes place between the pairs of parents. + It accepts 2 parameters: + -parents: The parents to mate for producing the offspring. + -offspring_size: The size of the offspring to produce. + It returns an array the produced offspring. + """ + + if self.gene_type_single == True: + offspring = numpy.empty(offspring_size, dtype=self.gene_type[0]) + else: + offspring = numpy.empty(offspring_size, dtype=object) + + for k in range(offspring_size[0]): + # The point at which crossover takes place between two parents. Usually, it is at the center. + crossover_point = numpy.random.randint(low=0, high=parents.shape[1], size=1)[0] + + if not (self.crossover_probability is None): + probs = numpy.random.random(size=parents.shape[0]) + indices = numpy.where(probs <= self.crossover_probability)[0] + + # If no parent satisfied the probability, no crossover is applied and a parent is selected. + if len(indices) == 0: + offspring[k, :] = parents[k % parents.shape[0], :] + continue + elif len(indices) == 1: + parent1_idx = indices[0] + parent2_idx = parent1_idx + else: + indices = random.sample(list(set(indices)), 2) + parent1_idx = indices[0] + parent2_idx = indices[1] + else: + # Index of the first parent to mate. + parent1_idx = k % parents.shape[0] + # Index of the second parent to mate. + parent2_idx = (k+1) % parents.shape[0] + + # The new offspring has its first half of its genes from the first parent. + offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point] + # The new offspring has its second half of its genes from the second parent. + offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:] + + if self.allow_duplicate_genes == False: + if self.gene_space is None: + offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + else: + offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], + gene_type=self.gene_type, + num_trials=10) + + return offspring + + def two_points_crossover(self, parents, offspring_size): + + """ + Applies the 2 points crossover. It selects the 2 points randomly at which crossover takes place between the pairs of parents. + It accepts 2 parameters: + -parents: The parents to mate for producing the offspring. + -offspring_size: The size of the offspring to produce. + It returns an array the produced offspring. + """ + + if self.gene_type_single == True: + offspring = numpy.empty(offspring_size, dtype=self.gene_type[0]) + else: + offspring = numpy.empty(offspring_size, dtype=object) + + for k in range(offspring_size[0]): + if (parents.shape[1] == 1): # If the chromosome has only a single gene. In this case, this gene is copied from the second parent. + crossover_point1 = 0 + else: + crossover_point1 = numpy.random.randint(low=0, high=numpy.ceil(parents.shape[1]/2 + 1), size=1)[0] + + crossover_point2 = crossover_point1 + int(parents.shape[1]/2) # The second point must always be greater than the first point. + + if not (self.crossover_probability is None): + probs = numpy.random.random(size=parents.shape[0]) + indices = numpy.where(probs <= self.crossover_probability)[0] + + # If no parent satisfied the probability, no crossover is applied and a parent is selected. + if len(indices) == 0: + offspring[k, :] = parents[k % parents.shape[0], :] + continue + elif len(indices) == 1: + parent1_idx = indices[0] + parent2_idx = parent1_idx + else: + indices = random.sample(list(set(indices)), 2) + parent1_idx = indices[0] + parent2_idx = indices[1] + else: + # Index of the first parent to mate. + parent1_idx = k % parents.shape[0] + # Index of the second parent to mate. + parent2_idx = (k+1) % parents.shape[0] + + # The genes from the beginning of the chromosome up to the first point are copied from the first parent. + offspring[k, 0:crossover_point1] = parents[parent1_idx, 0:crossover_point1] + # The genes from the second point up to the end of the chromosome are copied from the first parent. + offspring[k, crossover_point2:] = parents[parent1_idx, crossover_point2:] + # The genes between the 2 points are copied from the second parent. + offspring[k, crossover_point1:crossover_point2] = parents[parent2_idx, crossover_point1:crossover_point2] + + if self.allow_duplicate_genes == False: + if self.gene_space is None: + offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + else: + offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], + gene_type=self.gene_type, + num_trials=10) + return offspring + + def uniform_crossover(self, parents, offspring_size): + + """ + Applies the uniform crossover. For each gene, a parent out of the 2 mating parents is selected randomly and the gene is copied from it. + It accepts 2 parameters: + -parents: The parents to mate for producing the offspring. + -offspring_size: The size of the offspring to produce. + It returns an array the produced offspring. + """ + + if self.gene_type_single == True: + offspring = numpy.empty(offspring_size, dtype=self.gene_type[0]) + else: + offspring = numpy.empty(offspring_size, dtype=object) + + for k in range(offspring_size[0]): + if not (self.crossover_probability is None): + probs = numpy.random.random(size=parents.shape[0]) + indices = numpy.where(probs <= self.crossover_probability)[0] + + # If no parent satisfied the probability, no crossover is applied and a parent is selected. + if len(indices) == 0: + offspring[k, :] = parents[k % parents.shape[0], :] + continue + elif len(indices) == 1: + parent1_idx = indices[0] + parent2_idx = parent1_idx + else: + indices = random.sample(list(set(indices)), 2) + parent1_idx = indices[0] + parent2_idx = indices[1] + else: + # Index of the first parent to mate. + parent1_idx = k % parents.shape[0] + # Index of the second parent to mate. + parent2_idx = (k+1) % parents.shape[0] + + genes_source = numpy.random.randint(low=0, high=2, size=offspring_size[1]) + for gene_idx in range(offspring_size[1]): + if (genes_source[gene_idx] == 0): + # The gene will be copied from the first parent if the current gene index is 0. + offspring[k, gene_idx] = parents[parent1_idx, gene_idx] + elif (genes_source[gene_idx] == 1): + # The gene will be copied from the second parent if the current gene index is 1. + offspring[k, gene_idx] = parents[parent2_idx, gene_idx] + + if self.allow_duplicate_genes == False: + if self.gene_space is None: + offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + else: + offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], + gene_type=self.gene_type, + num_trials=10) + + return offspring + + def scattered_crossover(self, parents, offspring_size): + + """ + Applies the scattered crossover. It randomly selects the gene from one of the 2 parents. + It accepts 2 parameters: + -parents: The parents to mate for producing the offspring. + -offspring_size: The size of the offspring to produce. + It returns an array the produced offspring. + """ + + if self.gene_type_single == True: + offspring = numpy.empty(offspring_size, dtype=self.gene_type[0]) + else: + offspring = numpy.empty(offspring_size, dtype=object) + + for k in range(offspring_size[0]): + if not (self.crossover_probability is None): + probs = numpy.random.random(size=parents.shape[0]) + indices = numpy.where(probs <= self.crossover_probability)[0] + + # If no parent satisfied the probability, no crossover is applied and a parent is selected. + if len(indices) == 0: + offspring[k, :] = parents[k % parents.shape[0], :] + continue + elif len(indices) == 1: + parent1_idx = indices[0] + parent2_idx = parent1_idx + else: + indices = random.sample(list(set(indices)), 2) + parent1_idx = indices[0] + parent2_idx = indices[1] + else: + # Index of the first parent to mate. + parent1_idx = k % parents.shape[0] + # Index of the second parent to mate. + parent2_idx = (k+1) % parents.shape[0] + + # A 0/1 vector where 0 means the gene is taken from the first parent and 1 means the gene is taken from the second parent. + gene_sources = numpy.random.randint(0, 2, size=self.num_genes) + offspring[k, :] = numpy.where(gene_sources == 0, parents[parent1_idx, :], parents[parent2_idx, :]) + + if self.allow_duplicate_genes == False: + if self.gene_space is None: + offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + else: + offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], + gene_type=self.gene_type, + num_trials=10) + return offspring diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py new file mode 100644 index 0000000..a5eb867 --- /dev/null +++ b/pygad/utils/mutation.py @@ -0,0 +1,822 @@ +""" +The pygad.utils.mutation module has all the built-in mutation operators. +""" + +import numpy +import random + +import pygad + +class Mutation: + def random_mutation(self, offspring): + + """ + Applies the random mutation which changes the values of a number of genes randomly. + The random value is selected either using the 'gene_space' parameter or the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # If the mutation values are selected from the mutation space, the attribute 'gene_space' is not None. Otherwise, it is None. + # When the 'mutation_probability' parameter exists (i.e. not None), then it is used in the mutation. Otherwise, the 'mutation_num_genes' parameter is used. + + if self.mutation_probability is None: + # When the 'mutation_probability' parameter does not exist (i.e. None), then the parameter 'mutation_num_genes' is used in the mutation. + if not (self.gene_space is None): + # When the attribute 'gene_space' exists (i.e. not None), the mutation values are selected randomly from the space of values of each gene. + offspring = self.mutation_by_space(offspring) + else: + offspring = self.mutation_randomly(offspring) + else: + # When the 'mutation_probability' parameter exists (i.e. not None), then it is used in the mutation. + if not (self.gene_space is None): + # When the attribute 'gene_space' does not exist (i.e. None), the mutation values are selected randomly based on the continuous range specified by the 2 attributes 'random_mutation_min_val' and 'random_mutation_max_val'. + offspring = self.mutation_probs_by_space(offspring) + else: + offspring = self.mutation_probs_randomly(offspring) + + return offspring + + def mutation_by_space(self, offspring): + + """ + Applies the random mutation using the mutation values' space. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring using the mutation space. + """ + + # For each offspring, a value from the gene space is selected randomly and assigned to the selected mutated gene. + for offspring_idx in range(offspring.shape[0]): + mutation_indices = numpy.array(random.sample(range(0, self.num_genes), self.mutation_num_genes)) + for gene_idx in mutation_indices: + + if self.gene_space_nested: + # Returning the current gene space from the 'gene_space' attribute. + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + curr_gene_space = self.gene_space[gene_idx].copy() + else: + curr_gene_space = self.gene_space[gene_idx] + + # If the gene space has only a single value, use it as the new gene value. + if type(curr_gene_space) in pygad.GA.supported_int_float_types: + value_from_space = curr_gene_space + # If the gene space is None, apply mutation by adding a random value between the range defined by the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + elif curr_gene_space is None: + rand_val = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + value_from_space = rand_val + else: + value_from_space = offspring[offspring_idx, gene_idx] + rand_val + elif type(curr_gene_space) is dict: + # The gene's space of type dict specifies the lower and upper limits of a gene. + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + else: + # Selecting a value randomly based on the current gene's space in the 'gene_space' attribute. + # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. + if len(curr_gene_space) == 1: + value_from_space = curr_gene_space[0] + # If the gene space has more than 1 value, then select a new one that is different from the current value. + else: + values_to_select_from = list(set(curr_gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + else: + # Selecting a value randomly from the global gene space in the 'gene_space' attribute. + if type(self.gene_space) is dict: + # When the gene_space is assigned a dict object, then it specifies the lower and upper limits of all genes in the space. + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + else: + # If the space type is not of type dict, then a value is randomly selected from the gene_space attribute. + values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + # value_from_space = random.choice(self.gene_space) + + if value_from_space is None: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + + # Assinging the selected value from the space to the gene. + if self.gene_type_single == True: + if not self.gene_type[1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[0](value_from_space), + self.gene_type[1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[0](value_from_space) + else: + if not self.gene_type[gene_idx][1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[gene_idx][0](value_from_space), + self.gene_type[gene_idx][1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[gene_idx][0](value_from_space) + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[offspring_idx], + gene_type=self.gene_type, + num_trials=10) + return offspring + + def mutation_probs_by_space(self, offspring): + + """ + Applies the random mutation using the mutation values' space and the mutation probability. For each gene, if its probability is <= that mutation probability, then it will be mutated based on the mutation space. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring using the mutation space. + """ + + # For each offspring, a value from the gene space is selected randomly and assigned to the selected mutated gene. + for offspring_idx in range(offspring.shape[0]): + probs = numpy.random.random(size=offspring.shape[1]) + for gene_idx in range(offspring.shape[1]): + if probs[gene_idx] <= self.mutation_probability: + if self.gene_space_nested: + # Returning the current gene space from the 'gene_space' attribute. + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + curr_gene_space = self.gene_space[gene_idx].copy() + else: + curr_gene_space = self.gene_space[gene_idx] + + # If the gene space has only a single value, use it as the new gene value. + if type(curr_gene_space) in pygad.GA.supported_int_float_types: + value_from_space = curr_gene_space + # If the gene space is None, apply mutation by adding a random value between the range defined by the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + elif curr_gene_space is None: + rand_val = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + value_from_space = rand_val + else: + value_from_space = offspring[offspring_idx, gene_idx] + rand_val + elif type(curr_gene_space) is dict: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + else: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. + if len(curr_gene_space) == 1: + value_from_space = curr_gene_space[0] + # If the gene space has more than 1 value, then select a new one that is different from the current value. + else: + values_to_select_from = list(set(curr_gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + else: + # Selecting a value randomly from the global gene space in the 'gene_space' attribute. + if type(self.gene_space) is dict: + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + else: + values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + + # Assigning the selected value from the space to the gene. + if self.gene_type_single == True: + if not self.gene_type[1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[0](value_from_space), + self.gene_type[1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[0](value_from_space) + else: + if not self.gene_type[gene_idx][1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[gene_idx][0](value_from_space), + self.gene_type[gene_idx][1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[gene_idx][0](value_from_space) + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[offspring_idx], + gene_type=self.gene_type, + num_trials=10) + return offspring + + def mutation_randomly(self, offspring): + + """ + Applies the random mutation the mutation probability. For each gene, if its probability is <= that mutation probability, then it will be mutated randomly. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # Random mutation changes one or more genes in each offspring randomly. + for offspring_idx in range(offspring.shape[0]): + mutation_indices = numpy.array(random.sample(range(0, self.num_genes), self.mutation_num_genes)) + for gene_idx in mutation_indices: + # Generating a random value. + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. + if self.mutation_by_replacement: + if self.gene_type_single == True: + random_value = self.gene_type[0](random_value) + else: + random_value = self.gene_type[gene_idx][0](random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + # If the mutation_by_replacement attribute is False, then the random value is added to the gene value. + else: + if self.gene_type_single == True: + random_value = self.gene_type[0](offspring[offspring_idx, gene_idx] + random_value) + else: + random_value = self.gene_type[gene_idx][0](offspring[offspring_idx, gene_idx] + random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + + # Round the gene + if self.gene_type_single == True: + if not self.gene_type[1] is None: + random_value = numpy.round(random_value, self.gene_type[1]) + else: + if not self.gene_type[gene_idx][1] is None: + random_value = numpy.round(random_value, self.gene_type[gene_idx][1]) + + offspring[offspring_idx, gene_idx] = random_value + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[offspring_idx], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + + return offspring + + def mutation_probs_randomly(self, offspring): + + """ + Applies the random mutation using the mutation probability. For each gene, if its probability is <= that mutation probability, then it will be mutated randomly. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # Random mutation changes one or more gene in each offspring randomly. + for offspring_idx in range(offspring.shape[0]): + probs = numpy.random.random(size=offspring.shape[1]) + for gene_idx in range(offspring.shape[1]): + if probs[gene_idx] <= self.mutation_probability: + # Generating a random value. + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. + if self.mutation_by_replacement: + if self.gene_type_single == True: + random_value = self.gene_type[0](random_value) + else: + random_value = self.gene_type[gene_idx][0](random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + # If the mutation_by_replacement attribute is False, then the random value is added to the gene value. + else: + if self.gene_type_single == True: + random_value = self.gene_type[0](offspring[offspring_idx, gene_idx] + random_value) + else: + random_value = self.gene_type[gene_idx][0](offspring[offspring_idx, gene_idx] + random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + + # Round the gene + if self.gene_type_single == True: + if not self.gene_type[1] is None: + random_value = numpy.round(random_value, self.gene_type[1]) + else: + if not self.gene_type[gene_idx][1] is None: + random_value = numpy.round(random_value, self.gene_type[gene_idx][1]) + + offspring[offspring_idx, gene_idx] = random_value + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[offspring_idx], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + return offspring + + def swap_mutation(self, offspring): + + """ + Applies the swap mutation which interchanges the values of 2 randomly selected genes. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + for idx in range(offspring.shape[0]): + mutation_gene1 = numpy.random.randint(low=0, high=offspring.shape[1]/2, size=1)[0] + mutation_gene2 = mutation_gene1 + int(offspring.shape[1]/2) + + temp = offspring[idx, mutation_gene1] + offspring[idx, mutation_gene1] = offspring[idx, mutation_gene2] + offspring[idx, mutation_gene2] = temp + return offspring + + def inversion_mutation(self, offspring): + + """ + Applies the inversion mutation which selects a subset of genes and inverts them (in order). + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + for idx in range(offspring.shape[0]): + mutation_gene1 = numpy.random.randint(low=0, high=numpy.ceil(offspring.shape[1]/2 + 1), size=1)[0] + mutation_gene2 = mutation_gene1 + int(offspring.shape[1]/2) + + genes_to_scramble = numpy.flip(offspring[idx, mutation_gene1:mutation_gene2]) + offspring[idx, mutation_gene1:mutation_gene2] = genes_to_scramble + return offspring + + def scramble_mutation(self, offspring): + + """ + Applies the scramble mutation which selects a subset of genes and shuffles their order randomly. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + for idx in range(offspring.shape[0]): + mutation_gene1 = numpy.random.randint(low=0, high=numpy.ceil(offspring.shape[1]/2 + 1), size=1)[0] + mutation_gene2 = mutation_gene1 + int(offspring.shape[1]/2) + genes_range = numpy.arange(start=mutation_gene1, stop=mutation_gene2) + numpy.random.shuffle(genes_range) + + genes_to_scramble = numpy.flip(offspring[idx, genes_range]) + offspring[idx, genes_range] = genes_to_scramble + return offspring + + def adaptive_mutation_population_fitness(self, offspring): + + """ + A helper method to calculate the average fitness of the solutions before applying the adaptive mutation. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns the average fitness to be used in adaptive mutation. + """ + + fitness = self.last_generation_fitness.copy() + temp_population = numpy.zeros_like(self.population) + + if (self.keep_elitism == 0): + if (self.keep_parents == 0): + parents_to_keep = [] + elif (self.keep_parents == -1): + parents_to_keep = self.last_generation_parents.copy() + temp_population[0:len(parents_to_keep), :] = parents_to_keep + elif (self.keep_parents > 0): + parents_to_keep, _ = self.steady_state_selection(self.last_generation_fitness, num_parents=self.keep_parents) + temp_population[0:len(parents_to_keep), :] = parents_to_keep + else: + parents_to_keep, _ = self.steady_state_selection(self.last_generation_fitness, num_parents=self.keep_elitism) + temp_population[0:len(parents_to_keep), :] = parents_to_keep + + temp_population[len(parents_to_keep):, :] = offspring + + fitness[:self.last_generation_parents.shape[0]] = self.last_generation_fitness[self.last_generation_parents_indices] + + for idx in range(len(parents_to_keep), fitness.shape[0]): + fitness[idx] = self.fitness_func(self, temp_population[idx], None) + average_fitness = numpy.mean(fitness) + + return average_fitness, fitness[len(parents_to_keep):] + + def adaptive_mutation(self, offspring): + + """ + Applies the adaptive mutation which changes the values of a number of genes randomly. In adaptive mutation, the number of genes to mutate differs based on the fitness value of the solution. + The random value is selected either using the 'gene_space' parameter or the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # If the attribute 'gene_space' exists (i.e. not None), then the mutation values are selected from the 'gene_space' parameter according to the space of values of each gene. Otherwise, it is selected randomly based on the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + # When the 'mutation_probability' parameter exists (i.e. not None), then it is used in the mutation. Otherwise, the 'mutation_num_genes' parameter is used. + + if self.mutation_probability is None: + # When the 'mutation_probability' parameter does not exist (i.e. None), then the parameter 'mutation_num_genes' is used in the mutation. + if not (self.gene_space is None): + # When the attribute 'gene_space' exists (i.e. not None), the mutation values are selected randomly from the space of values of each gene. + offspring = self.adaptive_mutation_by_space(offspring) + else: + # When the attribute 'gene_space' does not exist (i.e. None), the mutation values are selected randomly based on the continuous range specified by the 2 attributes 'random_mutation_min_val' and 'random_mutation_max_val'. + offspring = self.adaptive_mutation_randomly(offspring) + else: + # When the 'mutation_probability' parameter exists (i.e. not None), then it is used in the mutation. + if not (self.gene_space is None): + # When the attribute 'gene_space' exists (i.e. not None), the mutation values are selected randomly from the space of values of each gene. + offspring = self.adaptive_mutation_probs_by_space(offspring) + else: + # When the attribute 'gene_space' does not exist (i.e. None), the mutation values are selected randomly based on the continuous range specified by the 2 attributes 'random_mutation_min_val' and 'random_mutation_max_val'. + offspring = self.adaptive_mutation_probs_randomly(offspring) + + return offspring + + def adaptive_mutation_by_space(self, offspring): + + """ + Applies the adaptive mutation based on the 2 parameters 'mutation_num_genes' and 'gene_space'. + A number of genes equal are selected randomly for mutation. This number depends on the fitness of the solution. + The random values are selected from the 'gene_space' parameter. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # For each offspring, a value from the gene space is selected randomly and assigned to the selected gene for mutation. + + average_fitness, offspring_fitness = self.adaptive_mutation_population_fitness(offspring) + + # Adaptive mutation changes one or more genes in each offspring randomly. + # The number of genes to mutate depends on the solution's fitness value. + for offspring_idx in range(offspring.shape[0]): + if offspring_fitness[offspring_idx] < average_fitness: + adaptive_mutation_num_genes = self.mutation_num_genes[0] + else: + adaptive_mutation_num_genes = self.mutation_num_genes[1] + mutation_indices = numpy.array(random.sample(range(0, self.num_genes), adaptive_mutation_num_genes)) + for gene_idx in mutation_indices: + + if self.gene_space_nested: + # Returning the current gene space from the 'gene_space' attribute. + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + curr_gene_space = self.gene_space[gene_idx].copy() + else: + curr_gene_space = self.gene_space[gene_idx] + + # If the gene space has only a single value, use it as the new gene value. + if type(curr_gene_space) in pygad.GA.supported_int_float_types: + value_from_space = curr_gene_space + # If the gene space is None, apply mutation by adding a random value between the range defined by the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + elif curr_gene_space is None: + rand_val = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + value_from_space = rand_val + else: + value_from_space = offspring[offspring_idx, gene_idx] + rand_val + elif type(curr_gene_space) is dict: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + else: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. + if len(curr_gene_space) == 1: + value_from_space = curr_gene_space[0] + # If the gene space has more than 1 value, then select a new one that is different from the current value. + else: + values_to_select_from = list(set(curr_gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + else: + # Selecting a value randomly from the global gene space in the 'gene_space' attribute. + if type(self.gene_space) is dict: + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + else: + values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + + + if value_from_space is None: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + + # Assinging the selected value from the space to the gene. + if self.gene_type_single == True: + if not self.gene_type[1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[0](value_from_space), + self.gene_type[1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[0](value_from_space) + else: + if not self.gene_type[gene_idx][1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[gene_idx][0](value_from_space), + self.gene_type[gene_idx][1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[gene_idx][0](value_from_space) + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[offspring_idx], + gene_type=self.gene_type, + num_trials=10) + return offspring + + def adaptive_mutation_randomly(self, offspring): + + """ + Applies the adaptive mutation based on the 'mutation_num_genes' parameter. + A number of genes equal are selected randomly for mutation. This number depends on the fitness of the solution. + The random values are selected based on the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + average_fitness, offspring_fitness = self.adaptive_mutation_population_fitness(offspring) + + # Adaptive random mutation changes one or more genes in each offspring randomly. + # The number of genes to mutate depends on the solution's fitness value. + for offspring_idx in range(offspring.shape[0]): + if offspring_fitness[offspring_idx] < average_fitness: + adaptive_mutation_num_genes = self.mutation_num_genes[0] + else: + adaptive_mutation_num_genes = self.mutation_num_genes[1] + mutation_indices = numpy.array(random.sample(range(0, self.num_genes), adaptive_mutation_num_genes)) + for gene_idx in mutation_indices: + # Generating a random value. + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. + if self.mutation_by_replacement: + if self.gene_type_single == True: + random_value = self.gene_type[0](random_value) + else: + random_value = self.gene_type[gene_idx][0](random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + # If the mutation_by_replacement attribute is False, then the random value is added to the gene value. + else: + if self.gene_type_single == True: + random_value = self.gene_type[0](offspring[offspring_idx, gene_idx] + random_value) + else: + random_value = self.gene_type[gene_idx][0](offspring[offspring_idx, gene_idx] + random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + + if self.gene_type_single == True: + if not self.gene_type[1] is None: + random_value = numpy.round(random_value, self.gene_type[1]) + else: + if not self.gene_type[gene_idx][1] is None: + random_value = numpy.round(random_value, self.gene_type[gene_idx][1]) + + offspring[offspring_idx, gene_idx] = random_value + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[offspring_idx], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + return offspring + + def adaptive_mutation_probs_by_space(self, offspring): + + """ + Applies the adaptive mutation based on the 2 parameters 'mutation_probability' and 'gene_space'. + Based on whether the solution fitness is above or below a threshold, the mutation is applied diffrently by mutating high or low number of genes. + The random values are selected based on space of values for each gene. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + # For each offspring, a value from the gene space is selected randomly and assigned to the selected gene for mutation. + + average_fitness, offspring_fitness = self.adaptive_mutation_population_fitness(offspring) + + # Adaptive random mutation changes one or more genes in each offspring randomly. + # The probability of mutating a gene depends on the solution's fitness value. + for offspring_idx in range(offspring.shape[0]): + if offspring_fitness[offspring_idx] < average_fitness: + adaptive_mutation_probability = self.mutation_probability[0] + else: + adaptive_mutation_probability = self.mutation_probability[1] + + probs = numpy.random.random(size=offspring.shape[1]) + for gene_idx in range(offspring.shape[1]): + if probs[gene_idx] <= adaptive_mutation_probability: + if self.gene_space_nested: + # Returning the current gene space from the 'gene_space' attribute. + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + curr_gene_space = self.gene_space[gene_idx].copy() + else: + curr_gene_space = self.gene_space[gene_idx] + + # If the gene space has only a single value, use it as the new gene value. + if type(curr_gene_space) in pygad.GA.supported_int_float_types: + value_from_space = curr_gene_space + # If the gene space is None, apply mutation by adding a random value between the range defined by the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + elif curr_gene_space is None: + rand_val = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + if self.mutation_by_replacement: + value_from_space = rand_val + else: + value_from_space = offspring[offspring_idx, gene_idx] + rand_val + elif type(curr_gene_space) is dict: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + if 'step' in curr_gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], + stop=curr_gene_space['high'], + step=curr_gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=curr_gene_space['low'], + high=curr_gene_space['high'], + size=1) + else: + # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. + # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. + if len(curr_gene_space) == 1: + value_from_space = curr_gene_space[0] + # If the gene space has more than 1 value, then select a new one that is different from the current value. + else: + values_to_select_from = list(set(curr_gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + else: + # Selecting a value randomly from the global gene space in the 'gene_space' attribute. + if type(self.gene_space) is dict: + if 'step' in self.gene_space.keys(): + value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), + size=1) + else: + value_from_space = numpy.random.uniform(low=self.gene_space['low'], + high=self.gene_space['high'], + size=1) + else: + values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) + + if len(values_to_select_from) == 0: + value_from_space = offspring[offspring_idx, gene_idx] + else: + value_from_space = random.choice(values_to_select_from) + + if value_from_space is None: + value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + + # Assinging the selected value from the space to the gene. + if self.gene_type_single == True: + if not self.gene_type[1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[0](value_from_space), + self.gene_type[1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[0](value_from_space) + else: + if not self.gene_type[gene_idx][1] is None: + offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[gene_idx][0](value_from_space), + self.gene_type[gene_idx][1]) + else: + offspring[offspring_idx, gene_idx] = self.gene_type[gene_idx][0](value_from_space) + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[offspring_idx], + gene_type=self.gene_type, + num_trials=10) + return offspring + + def adaptive_mutation_probs_randomly(self, offspring): + + """ + Applies the adaptive mutation based on the 'mutation_probability' parameter. + Based on whether the solution fitness is above or below a threshold, the mutation is applied diffrently by mutating high or low number of genes. + The random values are selected based on the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val'. + It accepts a single parameter: + -offspring: The offspring to mutate. + It returns an array of the mutated offspring. + """ + + average_fitness, offspring_fitness = self.adaptive_mutation_population_fitness(offspring) + + # Adaptive random mutation changes one or more genes in each offspring randomly. + # The probability of mutating a gene depends on the solution's fitness value. + for offspring_idx in range(offspring.shape[0]): + if offspring_fitness[offspring_idx] < average_fitness: + adaptive_mutation_probability = self.mutation_probability[0] + else: + adaptive_mutation_probability = self.mutation_probability[1] + + probs = numpy.random.random(size=offspring.shape[1]) + for gene_idx in range(offspring.shape[1]): + if probs[gene_idx] <= adaptive_mutation_probability: + # Generating a random value. + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. + if self.mutation_by_replacement: + if self.gene_type_single == True: + random_value = self.gene_type[0](random_value) + else: + random_value = self.gene_type[gene_idx][0](random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + # If the mutation_by_replacement attribute is False, then the random value is added to the gene value. + else: + if self.gene_type_single == True: + random_value = self.gene_type[0](offspring[offspring_idx, gene_idx] + random_value) + else: + random_value = self.gene_type[gene_idx][0](offspring[offspring_idx, gene_idx] + random_value) + if type(random_value) is numpy.ndarray: + random_value = random_value[0] + + if self.gene_type_single == True: + if not self.gene_type[1] is None: + random_value = numpy.round(random_value, self.gene_type[1]) + else: + if not self.gene_type[gene_idx][1] is None: + random_value = numpy.round(random_value, self.gene_type[gene_idx][1]) + + offspring[offspring_idx, gene_idx] = random_value + + if self.allow_duplicate_genes == False: + offspring[offspring_idx], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[offspring_idx], + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + return offspring diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py new file mode 100644 index 0000000..7f32181 --- /dev/null +++ b/pygad/utils/parent_selection.py @@ -0,0 +1,191 @@ +""" +The pygad.utils.parent_selection module has all the built-in parent selection operators. +""" + +import numpy + +class ParentSelection: + def steady_state_selection(self, fitness, num_parents): + + """ + Selects the parents using the steady-state selection technique. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + for parent_num in range(num_parents): + parents[parent_num, :] = self.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + + def rank_selection(self, fitness, num_parents): + + """ + Selects the parents using the rank selection technique. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + for parent_num in range(num_parents): + parents[parent_num, :] = self.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + + def random_selection(self, fitness, num_parents): + + """ + Selects the parents randomly. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + rand_indices = numpy.random.randint(low=0.0, high=fitness.shape[0], size=num_parents) + + for parent_num in range(num_parents): + parents[parent_num, :] = self.population[rand_indices[parent_num], :].copy() + + return parents, rand_indices + + def tournament_selection(self, fitness, num_parents): + + """ + Selects the parents using the tournament selection technique. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + parents_indices = [] + + for parent_num in range(num_parents): + rand_indices = numpy.random.randint(low=0.0, high=len(fitness), size=self.K_tournament) + K_fitnesses = fitness[rand_indices] + selected_parent_idx = numpy.where(K_fitnesses == numpy.max(K_fitnesses))[0][0] + parents_indices.append(rand_indices[selected_parent_idx]) + parents[parent_num, :] = self.population[rand_indices[selected_parent_idx], :].copy() + + return parents, numpy.array(parents_indices) + + def roulette_wheel_selection(self, fitness, num_parents): + + """ + Selects the parents using the roulette wheel selection technique. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + fitness_sum = numpy.sum(fitness) + if fitness_sum == 0: + self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") + raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") + probs = fitness / fitness_sum + probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities. + probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities. + + curr = 0.0 + + # Calculating the probabilities of the solutions to form a roulette wheel. + for _ in range(probs.shape[0]): + min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0] + probs_start[min_probs_idx] = curr + curr = curr + probs[min_probs_idx] + probs_end[min_probs_idx] = curr + probs[min_probs_idx] = 99999999999 + + # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + parents_indices = [] + + for parent_num in range(num_parents): + rand_prob = numpy.random.rand() + for idx in range(probs.shape[0]): + if (rand_prob >= probs_start[idx] and rand_prob < probs_end[idx]): + parents[parent_num, :] = self.population[idx, :].copy() + parents_indices.append(idx) + break + return parents, numpy.array(parents_indices) + + def stochastic_universal_selection(self, fitness, num_parents): + + """ + Selects the parents using the stochastic universal selection technique. Later, these parents will mate to produce the offspring. + It accepts 2 parameters: + -fitness: The fitness values of the solutions in the current population. + -num_parents: The number of parents to be selected. + It returns an array of the selected parents. + """ + + fitness_sum = numpy.sum(fitness) + if fitness_sum == 0: + self.logger.error("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") + raise ZeroDivisionError("Cannot proceed because the sum of fitness values is zero. Cannot divide by zero.") + probs = fitness / fitness_sum + probs_start = numpy.zeros(probs.shape, dtype=float) # An array holding the start values of the ranges of probabilities. + probs_end = numpy.zeros(probs.shape, dtype=float) # An array holding the end values of the ranges of probabilities. + + curr = 0.0 + + # Calculating the probabilities of the solutions to form a roulette wheel. + for _ in range(probs.shape[0]): + min_probs_idx = numpy.where(probs == numpy.min(probs))[0][0] + probs_start[min_probs_idx] = curr + curr = curr + probs[min_probs_idx] + probs_end[min_probs_idx] = curr + probs[min_probs_idx] = 99999999999 + + pointers_distance = 1.0 / self.num_parents_mating # Distance between different pointers. + first_pointer = numpy.random.uniform(low=0.0, high=pointers_distance, size=1) # Location of the first pointer. + + # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. + if self.gene_type_single == True: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=self.gene_type[0]) + else: + parents = numpy.empty((num_parents, self.population.shape[1]), dtype=object) + + parents_indices = [] + + for parent_num in range(num_parents): + rand_pointer = first_pointer + parent_num*pointers_distance + for idx in range(probs.shape[0]): + if (rand_pointer >= probs_start[idx] and rand_pointer < probs_end[idx]): + parents[parent_num, :] = self.population[idx, :].copy() + parents_indices.append(idx) + break + return parents, numpy.array(parents_indices) diff --git a/pygad/visualize/__init__.py b/pygad/visualize/__init__.py new file mode 100644 index 0000000..6b79646 --- /dev/null +++ b/pygad/visualize/__init__.py @@ -0,0 +1,3 @@ +from pygad.visualize import plot + +__version__ = "1.0.0" \ No newline at end of file diff --git a/pygad/visualize/plot.py b/pygad/visualize/plot.py new file mode 100644 index 0000000..a79f848 --- /dev/null +++ b/pygad/visualize/plot.py @@ -0,0 +1,348 @@ +""" +The pygad.visualize.plot module has methods to create plots. +""" + +import numpy +import warnings +import matplotlib.pyplot + +class Plot: + def plot_result(self, + title="PyGAD - Generation vs. Fitness", + xlabel="Generation", + ylabel="Fitness", + linewidth=3, + font_size=14, + plot_type="plot", + color="#3870FF", + save_dir=None): + + if not self.suppress_warnings: + warnings.warn("Please use the plot_fitness() method instead of plot_result(). The plot_result() method will be removed in the future.") + + return self.plot_fitness(title=title, + xlabel=xlabel, + ylabel=ylabel, + linewidth=linewidth, + font_size=font_size, + plot_type=plot_type, + color=color, + save_dir=save_dir) + + def plot_fitness(self, + title="PyGAD - Generation vs. Fitness", + xlabel="Generation", + ylabel="Fitness", + linewidth=3, + font_size=14, + plot_type="plot", + color="#3870FF", + save_dir=None): + + """ + Creates, shows, and returns a figure that summarizes how the fitness value evolved by generation. Can only be called after completing at least 1 generation. If no generation is completed, an exception is raised. + + Accepts the following: + title: Figure title. + xlabel: Label on the X-axis. + ylabel: Label on the Y-axis. + linewidth: Line width of the plot. Defaults to 3. + font_size: Font size for the labels and title. Defaults to 14. + plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". + color: Color of the plot which defaults to "#3870FF". + save_dir: Directory to save the figure. + + Returns the figure. + """ + + if self.generations_completed < 1: + self.logger.error("The plot_fitness() (i.e. plot_result()) method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + raise RuntimeError("The plot_fitness() (i.e. plot_result()) method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + + fig = matplotlib.pyplot.figure() + if plot_type == "plot": + matplotlib.pyplot.plot(self.best_solutions_fitness, linewidth=linewidth, color=color) + elif plot_type == "scatter": + matplotlib.pyplot.scatter(range(len(self.best_solutions_fitness)), self.best_solutions_fitness, linewidth=linewidth, color=color) + elif plot_type == "bar": + matplotlib.pyplot.bar(range(len(self.best_solutions_fitness)), self.best_solutions_fitness, linewidth=linewidth, color=color) + matplotlib.pyplot.title(title, fontsize=font_size) + matplotlib.pyplot.xlabel(xlabel, fontsize=font_size) + matplotlib.pyplot.ylabel(ylabel, fontsize=font_size) + + if not save_dir is None: + matplotlib.pyplot.savefig(fname=save_dir, + bbox_inches='tight') + matplotlib.pyplot.show() + + return fig + + def plot_new_solution_rate(self, + title="PyGAD - Generation vs. New Solution Rate", + xlabel="Generation", + ylabel="New Solution Rate", + linewidth=3, + font_size=14, + plot_type="plot", + color="#3870FF", + save_dir=None): + + """ + Creates, shows, and returns a figure that summarizes the rate of exploring new solutions. This method works only when save_solutions=True in the constructor of the pygad.GA class. + + Accepts the following: + title: Figure title. + xlabel: Label on the X-axis. + ylabel: Label on the Y-axis. + linewidth: Line width of the plot. Defaults to 3. + font_size: Font size for the labels and title. Defaults to 14. + plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". + color: Color of the plot which defaults to "#3870FF". + save_dir: Directory to save the figure. + + Returns the figure. + """ + + if self.generations_completed < 1: + self.logger.error("The plot_new_solution_rate() method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + raise RuntimeError("The plot_new_solution_rate() method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + + if self.save_solutions == False: + self.logger.error("The plot_new_solution_rate() method works only when save_solutions=True in the constructor of the pygad.GA class.") + raise RuntimeError("The plot_new_solution_rate() method works only when save_solutions=True in the constructor of the pygad.GA class.") + + unique_solutions = set() + num_unique_solutions_per_generation = [] + for generation_idx in range(self.generations_completed): + + len_before = len(unique_solutions) + + start = generation_idx * self.sol_per_pop + end = start + self.sol_per_pop + + for sol in self.solutions[start:end]: + unique_solutions.add(tuple(sol)) + + len_after = len(unique_solutions) + + generation_num_unique_solutions = len_after - len_before + num_unique_solutions_per_generation.append(generation_num_unique_solutions) + + fig = matplotlib.pyplot.figure() + if plot_type == "plot": + matplotlib.pyplot.plot(num_unique_solutions_per_generation, linewidth=linewidth, color=color) + elif plot_type == "scatter": + matplotlib.pyplot.scatter(range(self.generations_completed), num_unique_solutions_per_generation, linewidth=linewidth, color=color) + elif plot_type == "bar": + matplotlib.pyplot.bar(range(self.generations_completed), num_unique_solutions_per_generation, linewidth=linewidth, color=color) + matplotlib.pyplot.title(title, fontsize=font_size) + matplotlib.pyplot.xlabel(xlabel, fontsize=font_size) + matplotlib.pyplot.ylabel(ylabel, fontsize=font_size) + + if not save_dir is None: + matplotlib.pyplot.savefig(fname=save_dir, + bbox_inches='tight') + matplotlib.pyplot.show() + + return fig + + def plot_genes(self, + title="PyGAD - Gene", + xlabel="Gene", + ylabel="Value", + linewidth=3, + font_size=14, + plot_type="plot", + graph_type="plot", + fill_color="#3870FF", + color="black", + solutions="all", + save_dir=None): + + """ + Creates, shows, and returns a figure with number of subplots equal to the number of genes. Each subplot shows the gene value for each generation. + This method works only when save_solutions=True in the constructor of the pygad.GA class. + It also works only after completing at least 1 generation. If no generation is completed, an exception is raised. + + Accepts the following: + title: Figure title. + xlabel: Label on the X-axis. + ylabel: Label on the Y-axis. + linewidth: Line width of the plot. Defaults to 3. + font_size: Font size for the labels and title. Defaults to 14. + plot_type: Type of the plot which can be either "plot" (default), "scatter", or "bar". + graph_type: Type of the graph which can be either "plot" (default), "boxplot", or "histogram". + fill_color: Fill color of the graph which defaults to "#3870FF". This has no effect if graph_type="plot". + color: Color of the plot which defaults to "black". + solutions: Defaults to "all" which means use all solutions. If "best" then only the best solutions are used. + save_dir: Directory to save the figure. + + Returns the figure. + """ + + if self.generations_completed < 1: + self.logger.error("The plot_genes() method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + raise RuntimeError("The plot_genes() method can only be called after completing at least 1 generation but ({generations_completed}) is completed.".format(generations_completed=self.generations_completed)) + + if type(solutions) is str: + if solutions == 'all': + if self.save_solutions: + solutions_to_plot = numpy.array(self.solutions) + else: + self.logger.error("The plot_genes() method with solutions='all' can only be called if 'save_solutions=True' in the pygad.GA class constructor.") + raise RuntimeError("The plot_genes() method with solutions='all' can only be called if 'save_solutions=True' in the pygad.GA class constructor.") + elif solutions == 'best': + if self.save_best_solutions: + solutions_to_plot = self.best_solutions + else: + self.logger.error("The plot_genes() method with solutions='best' can only be called if 'save_best_solutions=True' in the pygad.GA class constructor.") + raise RuntimeError("The plot_genes() method with solutions='best' can only be called if 'save_best_solutions=True' in the pygad.GA class constructor.") + else: + self.logger.error("The solutions parameter can be either 'all' or 'best' but {solutions} found.".format(solutions=solutions)) + raise RuntimeError("The solutions parameter can be either 'all' or 'best' but {solutions} found.".format(solutions=solutions)) + else: + self.logger.error("The solutions parameter must be a string but {solutions_type} found.".format(solutions_type=type(solutions))) + raise RuntimeError("The solutions parameter must be a string but {solutions_type} found.".format(solutions_type=type(solutions))) + + if graph_type == "plot": + # num_rows will be always be >= 1 + # num_cols can only be 0 if num_genes=1 + num_rows = int(numpy.ceil(self.num_genes/5.0)) + num_cols = int(numpy.ceil(self.num_genes/num_rows)) + + if num_cols == 0: + figsize = (10, 8) + # There is only a single gene + fig, ax = matplotlib.pyplot.subplots(num_rows, figsize=figsize) + if plot_type == "plot": + ax.plot(solutions_to_plot[:, 0], linewidth=linewidth, color=fill_color) + elif plot_type == "scatter": + ax.scatter(range(self.generations_completed + 1), solutions_to_plot[:, 0], linewidth=linewidth, color=fill_color) + elif plot_type == "bar": + ax.bar(range(self.generations_completed + 1), solutions_to_plot[:, 0], linewidth=linewidth, color=fill_color) + ax.set_xlabel(0, fontsize=font_size) + else: + fig, axs = matplotlib.pyplot.subplots(num_rows, num_cols) + + if num_cols == 1 and num_rows == 1: + fig.set_figwidth(5 * num_cols) + fig.set_figheight(4) + axs.plot(solutions_to_plot[:, 0], linewidth=linewidth, color=fill_color) + axs.set_xlabel("Gene " + str(0), fontsize=font_size) + elif num_cols == 1 or num_rows == 1: + fig.set_figwidth(5 * num_cols) + fig.set_figheight(4) + for gene_idx in range(len(axs)): + if plot_type == "plot": + axs[gene_idx].plot(solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + elif plot_type == "scatter": + axs[gene_idx].scatter(range(solutions_to_plot.shape[0]), solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + elif plot_type == "bar": + axs[gene_idx].bar(range(solutions_to_plot.shape[0]), solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + axs[gene_idx].set_xlabel("Gene " + str(gene_idx), fontsize=font_size) + else: + gene_idx = 0 + fig.set_figwidth(25) + fig.set_figheight(4*num_rows) + for row_idx in range(num_rows): + for col_idx in range(num_cols): + if gene_idx >= self.num_genes: + # axs[row_idx, col_idx].remove() + break + if plot_type == "plot": + axs[row_idx, col_idx].plot(solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + elif plot_type == "scatter": + axs[row_idx, col_idx].scatter(range(solutions_to_plot.shape[0]), solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + elif plot_type == "bar": + axs[row_idx, col_idx].bar(range(solutions_to_plot.shape[0]), solutions_to_plot[:, gene_idx], linewidth=linewidth, color=fill_color) + axs[row_idx, col_idx].set_xlabel("Gene " + str(gene_idx), fontsize=font_size) + gene_idx += 1 + + fig.suptitle(title, fontsize=font_size, y=1.001) + matplotlib.pyplot.tight_layout() + + elif graph_type == "boxplot": + fig = matplotlib.pyplot.figure(1, figsize=(0.7*self.num_genes, 6)) + + # Create an axes instance + ax = fig.add_subplot(111) + boxeplots = ax.boxplot(solutions_to_plot, + labels=range(self.num_genes), + patch_artist=True) + # adding horizontal grid lines + ax.yaxis.grid(True) + + for box in boxeplots['boxes']: + # change outline color + box.set(color='black', linewidth=linewidth) + # change fill color https://color.adobe.com/create/color-wheel + box.set_facecolor(fill_color) + + for whisker in boxeplots['whiskers']: + whisker.set(color=color, linewidth=linewidth) + for median in boxeplots['medians']: + median.set(color=color, linewidth=linewidth) + for cap in boxeplots['caps']: + cap.set(color=color, linewidth=linewidth) + + matplotlib.pyplot.title(title, fontsize=font_size) + matplotlib.pyplot.xlabel(xlabel, fontsize=font_size) + matplotlib.pyplot.ylabel(ylabel, fontsize=font_size) + matplotlib.pyplot.tight_layout() + + elif graph_type == "histogram": + # num_rows will always be >= 1 + # num_cols can only be 0 if num_genes=1 + num_rows = int(numpy.ceil(self.num_genes/5.0)) + num_cols = int(numpy.ceil(self.num_genes/num_rows)) + + if num_cols == 0: + figsize = (10, 8) + # There is only a single gene + fig, ax = matplotlib.pyplot.subplots(num_rows, + figsize=figsize) + ax.hist(solutions_to_plot[:, 0], color=fill_color) + ax.set_xlabel(0, fontsize=font_size) + else: + fig, axs = matplotlib.pyplot.subplots(num_rows, num_cols) + + if num_cols == 1 and num_rows == 1: + fig.set_figwidth(4 * num_cols) + fig.set_figheight(3) + axs.hist(solutions_to_plot[:, 0], + color=fill_color, + rwidth=0.95) + axs.set_xlabel("Gene " + str(0), fontsize=font_size) + elif num_cols == 1 or num_rows == 1: + fig.set_figwidth(4 * num_cols) + fig.set_figheight(3) + for gene_idx in range(len(axs)): + axs[gene_idx].hist(solutions_to_plot[:, gene_idx], + color=fill_color, + rwidth=0.95) + axs[gene_idx].set_xlabel("Gene " + str(gene_idx), fontsize=font_size) + else: + gene_idx = 0 + fig.set_figwidth(20) + fig.set_figheight(3*num_rows) + for row_idx in range(num_rows): + for col_idx in range(num_cols): + if gene_idx >= self.num_genes: + # axs[row_idx, col_idx].remove() + break + axs[row_idx, col_idx].hist(solutions_to_plot[:, gene_idx], + color=fill_color, + rwidth=0.95) + axs[row_idx, col_idx].set_xlabel("Gene " + str(gene_idx), fontsize=font_size) + gene_idx += 1 + + fig.suptitle(title, fontsize=font_size, y=1.001) + matplotlib.pyplot.tight_layout() + + if not save_dir is None: + matplotlib.pyplot.savefig(fname=save_dir, + bbox_inches='tight') + + matplotlib.pyplot.show() + + return fig From 05ace78a3d0cd2577a70fd85ed1a5a9d12e3aa8f Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:40:53 -0400 Subject: [PATCH 02/32] Delete pygad.py --- pygad/pygad.py | 2219 ------------------------------------------------ 1 file changed, 2219 deletions(-) delete mode 100644 pygad/pygad.py diff --git a/pygad/pygad.py b/pygad/pygad.py deleted file mode 100644 index 9916a15..0000000 --- a/pygad/pygad.py +++ /dev/null @@ -1,2219 +0,0 @@ -import numpy -import random -import cloudpickle -import time -import warnings -import concurrent.futures -import inspect -import logging -from pygad import utils -from pygad import helper -from pygad import visualize - -class GA(utils.parent_selection.ParentSelection, - utils.crossover.Crossover, - utils.mutation.Mutation, - helper.unique.Unique, - visualize.plot.Plot): - - supported_int_types = [int, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64] - supported_float_types = [float, numpy.float16, numpy.float32, numpy.float64] - supported_int_float_types = supported_int_types + supported_float_types - - def __init__(self, - num_generations, - num_parents_mating, - fitness_func, - fitness_batch_size=None, - initial_population=None, - sol_per_pop=None, - num_genes=None, - init_range_low=-4, - init_range_high=4, - gene_type=float, - parent_selection_type="sss", - keep_parents=-1, - keep_elitism=1, - K_tournament=3, - crossover_type="single_point", - crossover_probability=None, - mutation_type="random", - mutation_probability=None, - mutation_by_replacement=False, - mutation_percent_genes='default', - mutation_num_genes=None, - random_mutation_min_val=-1.0, - random_mutation_max_val=1.0, - gene_space=None, - allow_duplicate_genes=True, - on_start=None, - on_fitness=None, - on_parents=None, - on_crossover=None, - on_mutation=None, - on_generation=None, - on_stop=None, - delay_after_gen=0.0, - save_best_solutions=False, - save_solutions=False, - suppress_warnings=False, - stop_criteria=None, - parallel_processing=None, - random_seed=None, - logger=None): - - """ - The constructor of the GA class accepts all parameters required to create an instance of the GA class. It validates such parameters. - - num_generations: Number of generations. - num_parents_mating: Number of solutions to be selected as parents in the mating pool. - - fitness_func: Accepts a function/method and returns the fitness value of the solution. In PyGAD 2.20.0, a third parameter is passed referring to the 'pygad.GA' instance. If method, then it must accept 4 parameters where the fourth one refers to the method's object. - fitness_batch_size: Added in PyGAD 2.19.0. Supports calculating the fitness in batches. If the value is 1 or None, then the fitness function is called for each invidiaul solution. If given another value X where X is neither 1 nor None (e.g. X=3), then the fitness function is called once for each X (3) solutions. - - initial_population: A user-defined initial population. It is useful when the user wants to start the generations with a custom initial population. It defaults to None which means no initial population is specified by the user. In this case, PyGAD creates an initial population using the 'sol_per_pop' and 'num_genes' parameters. An exception is raised if the 'initial_population' is None while any of the 2 parameters ('sol_per_pop' or 'num_genes') is also None. - sol_per_pop: Number of solutions in the population. - num_genes: Number of parameters in the function. - - init_range_low: The lower value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20 and higher. - init_range_high: The upper value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20. - # It is OK to set the value of any of the 2 parameters ('init_range_low' and 'init_range_high') to be equal, higher or lower than the other parameter (i.e. init_range_low is not needed to be lower than init_range_high). - - gene_type: The type of the gene. It is assigned to any of these types (int, float, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64, numpy.float16, numpy.float32, numpy.float64) and forces all the genes to be of that type. - - parent_selection_type: Type of parent selection. - keep_parents: If 0, this means no parent in the current population will be used in the next population. If -1, this means all parents in the current population will be used in the next population. If set to a value > 0, then the specified value refers to the number of parents in the current population to be used in the next population. Some parent selection operators such as rank selection, favor population diversity and therefore keeping the parents in the next generation can be beneficial. However, some other parent selection operators, such as roulette wheel selection (RWS), have higher selection pressure and keeping more than one parent in the next generation can seriously harm population diversity. This parameter have an effect only when the keep_elitism parameter is 0. Thanks to Prof. Fernando Jiménez Barrionuevo (http://webs.um.es/fernan) for editing this sentence. - K_tournament: When the value of 'parent_selection_type' is 'tournament', the 'K_tournament' parameter specifies the number of solutions from which a parent is selected randomly. - - keep_elitism: Added in PyGAD 2.18.0. It can take the value 0 or a positive integer that satisfies (0 <= keep_elitism <= sol_per_pop). It defaults to 1 which means only the best solution in the current generation is kept in the next generation. If assigned 0, this means it has no effect. If assigned a positive integer K, then the best K solutions are kept in the next generation. It cannot be assigned a value greater than the value assigned to the sol_per_pop parameter. If this parameter has a value different than 0, then the keep_parents parameter will have no effect. - - crossover_type: Type of the crossover opreator. If crossover_type=None, then the crossover step is bypassed which means no crossover is applied and thus no offspring will be created in the next generations. The next generation will use the solutions in the current population. - crossover_probability: The probability of selecting a solution for the crossover operation. If the solution probability is <= crossover_probability, the solution is selected. The value must be between 0 and 1 inclusive. - - mutation_type: Type of the mutation opreator. If mutation_type=None, then the mutation step is bypassed which means no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. - mutation_probability: The probability of selecting a gene for the mutation operation. If the gene probability is <= mutation_probability, the gene is selected. It accepts either a single value for fixed mutation or a list/tuple/numpy.ndarray of 2 values for adaptive mutation. The values must be between 0 and 1 inclusive. If specified, then no need for the 2 parameters mutation_percent_genes and mutation_num_genes. - - mutation_by_replacement: An optional bool parameter. It works only when the selected type of mutation is random (mutation_type="random"). In this case, setting mutation_by_replacement=True means replace the gene by the randomly generated value. If False, then it has no effect and random mutation works by adding the random value to the gene. - - mutation_percent_genes: Percentage of genes to mutate which defaults to the string 'default' which means 10%. This parameter has no action if any of the 2 parameters mutation_probability or mutation_num_genes exist. - mutation_num_genes: Number of genes to mutate which defaults to None. If the parameter mutation_num_genes exists, then no need for the parameter mutation_percent_genes. This parameter has no action if the mutation_probability parameter exists. - random_mutation_min_val: The minimum value of the range from which a random value is selected to be added to the selected gene(s) to mutate. It defaults to -1.0. - random_mutation_max_val: The maximum value of the range from which a random value is selected to be added to the selected gene(s) to mutate. It defaults to 1.0. - - gene_space: It accepts a list of all possible values of the gene. This list is used in the mutation step. Should be used only if the gene space is a set of discrete values. No need for the 2 parameters (random_mutation_min_val and random_mutation_max_val) if the parameter gene_space exists. Added in PyGAD 2.5.0. In PyGAD 2.11.0, the gene_space can be assigned a dict. - - on_start: Accepts a function/method to be called only once before the genetic algorithm starts its evolution. If function, then it must accept a single parameter representing the instance of the genetic algorithm. If method, then it must accept 2 parameters where the second one refers to the method's object. Added in PyGAD 2.6.0. - on_fitness: Accepts a function/method to be called after calculating the fitness values of all solutions in the population. If function, then it must accept 2 parameters: 1) a list of all solutions' fitness values 2) the instance of the genetic algorithm. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. - on_parents: Accepts a function/method to be called after selecting the parents that mates. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the selected parents. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. - on_crossover: Accepts a function/method to be called each time the crossover operation is applied. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring generated using crossover. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. - on_mutation: Accepts a function/method to be called each time the mutation operation is applied. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one represents the offspring after applying the mutation. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. - on_generation: Accepts a function/method to be called after each generation. If function, then it must accept a single parameter representing the instance of the genetic algorithm. If the function returned "stop", then the run() method stops without completing the other generations. If method, then it must accept 2 parameters where the second one refers to the method's object. Added in PyGAD 2.6.0. - on_stop: Accepts a function/method to be called only once exactly before the genetic algorithm stops or when it completes all the generations. If function, then it must accept 2 parameters: the first one represents the instance of the genetic algorithm and the second one is a list of fitness values of the last population's solutions. If method, then it must accept 3 parameters where the third one refers to the method's object. Added in PyGAD 2.6.0. - - delay_after_gen: Added in PyGAD 2.4.0. It accepts a non-negative number specifying the number of seconds to wait after a generation completes and before going to the next generation. It defaults to 0.0 which means no delay after the generation. - - save_best_solutions: Added in PyGAD 2.9.0 and its type is bool. If True, then the best solution in each generation is saved into the 'best_solutions' attribute. Use this parameter with caution as it may cause memory overflow when either the number of generations or the number of genes is large. - save_solutions: Added in PyGAD 2.15.0 and its type is bool. If True, then all solutions in each generation are saved into the 'solutions' attribute. Use this parameter with caution as it may cause memory overflow when either the number of generations, number of genes, or number of solutions in population is large. - - suppress_warnings: Added in PyGAD 2.10.0 and its type is bool. If True, then no warning messages will be displayed. It defaults to False. - - allow_duplicate_genes: Added in PyGAD 2.13.0. If True, then a solution/chromosome may have duplicate gene values. If False, then each gene will have a unique value in its solution. - - stop_criteria: Added in PyGAD 2.15.0. It is assigned to some criteria to stop the evolution if at least one criterion holds. - - parallel_processing: Added in PyGAD 2.17.0. Defaults to `None` which means no parallel processing is used. If a positive integer is assigned, it specifies the number of threads to be used. If a list or a tuple of exactly 2 elements is assigned, then: 1) The first element can be either "process" or "thread" to specify whether processes or threads are used, respectively. 2) The second element can be: 1) A positive integer to select the maximum number of processes or threads to be used. 2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'. 3) None to use the default value as calculated by the concurrent.futures module. - - random_seed: Added in PyGAD 2.18.0. It defines the random seed to be used by the random function generators (we use random functions in the NumPy and random modules). This helps to reproduce the same results by setting the same random seed. - - logger: Added in PyGAD 2.20.0. It accepts a logger object of the 'logging.Logger' class to log the messages. If no logger is passed, then a default logger is created to log/print the messages to the console exactly like using the 'print()' function. - """ - - # If no logger is passed, then create a logger that logs only the messages to the console. - if logger is None: - # Create a logger named with the module name. - logger = logging.getLogger(__name__) - # Set the logger log level to 'DEBUG' to log all kinds of messages. - logger.setLevel(logging.DEBUG) - - # Clear any attached handlers to the logger from the previous runs. - # If the handlers are not cleared, then the new handler will be appended to the list of handlers. - # This makes the single log message be repeated according to the length of the list of handlers. - logger.handlers.clear() - - # Create the handlers. - stream_handler = logging.StreamHandler() - # Set the handler log level to 'DEBUG' to log all kinds of messages received from the logger. - stream_handler.setLevel(logging.DEBUG) - - # Create the formatter that just includes the log message. - formatter = logging.Formatter('%(message)s') - - # Add the formatter to the handler. - stream_handler.setFormatter(formatter) - - # Add the handler to the logger. - logger.addHandler(stream_handler) - else: - # Validate that the passed logger is of type 'logging.Logger'. - if isinstance(logger, logging.Logger): - pass - else: - raise TypeError("The expected type of the 'logger' parameter is 'logging.Logger' but {logger_type} found.".format(logger_type=type(logger))) - - # Create the 'self.logger' attribute to hold the logger. - # Instead of using 'print()', use 'self.logger.info()' - self.logger = logger - - self.random_seed = random_seed - if random_seed is None: - pass - else: - numpy.random.seed(self.random_seed) - random.seed(self.random_seed) - - # If suppress_warnings is bool and its valud is False, then print warning messages. - if type(suppress_warnings) is bool: - self.suppress_warnings = suppress_warnings - else: - self.valid_parameters = False - self.logger.error("The expected type of the 'suppress_warnings' parameter is bool but {suppress_warnings_type} found.".format(suppress_warnings_type=type(suppress_warnings))) - raise TypeError("The expected type of the 'suppress_warnings' parameter is bool but {suppress_warnings_type} found.".format(suppress_warnings_type=type(suppress_warnings))) - - # Validating mutation_by_replacement - if not (type(mutation_by_replacement) is bool): - self.valid_parameters = False - self.logger.error("The expected type of the 'mutation_by_replacement' parameter is bool but {mutation_by_replacement_type} found.".format(mutation_by_replacement_type=type(mutation_by_replacement))) - raise TypeError("The expected type of the 'mutation_by_replacement' parameter is bool but {mutation_by_replacement_type} found.".format(mutation_by_replacement_type=type(mutation_by_replacement))) - - self.mutation_by_replacement = mutation_by_replacement - - # Validate gene_space - self.gene_space_nested = False - if type(gene_space) is type(None): - pass - elif type(gene_space) in [list, tuple, range, numpy.ndarray]: - if len(gene_space) == 0: - self.valid_parameters = False - self.logger.error("'gene_space' cannot be empty (i.e. its length must be >= 0).") - raise ValueError("'gene_space' cannot be empty (i.e. its length must be >= 0).") - else: - for index, el in enumerate(gene_space): - if type(el) in [list, tuple, range, numpy.ndarray]: - if len(el) == 0: - self.valid_parameters = False - self.logger.error("The element indexed {index} of 'gene_space' with type {el_type} cannot be empty (i.e. its length must be >= 0).".format(index=index, el_type=type(el))) - raise ValueError("The element indexed {index} of 'gene_space' with type {el_type} cannot be empty (i.e. its length must be >= 0).".format(index=index, el_type=type(el))) - else: - for val in el: - if not (type(val) in [type(None)] + GA.supported_int_float_types): - self.logger.error("All values in the sublists inside the 'gene_space' attribute must be numeric of type int/float/None but ({val}) of type {typ} found.".format(val=val, typ=type(val))) - raise TypeError("All values in the sublists inside the 'gene_space' attribute must be numeric of type int/float/None but ({val}) of type {typ} found.".format(val=val, typ=type(val))) - self.gene_space_nested = True - elif type(el) == type(None): - pass - # self.gene_space_nested = True - elif type(el) is dict: - if len(el.items()) == 2: - if ('low' in el.keys()) and ('high' in el.keys()): - pass - else: - self.valid_parameters = False - self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) - raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) - elif len(el.items()) == 3: - if ('low' in el.keys()) and ('high' in el.keys()) and ('step' in el.keys()): - pass - else: - self.valid_parameters = False - self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) - raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it can have the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=el.keys())) - else: - self.valid_parameters = False - self.logger.error("When an element in the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(el.items()))) - raise ValueError("When an element in the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(el.items()))) - self.gene_space_nested = True - elif not (type(el) in GA.supported_int_float_types): - self.valid_parameters = False - self.logger.error("Unexpected type {el_type} for the element indexed {index} of 'gene_space'. The accepted types are list/tuple/range/numpy.ndarray of numbers, a single number (int/float), or None.".format(index=index, el_type=type(el))) - raise TypeError("Unexpected type {el_type} for the element indexed {index} of 'gene_space'. The accepted types are list/tuple/range/numpy.ndarray of numbers, a single number (int/float), or None.".format(index=index, el_type=type(el))) - - elif type(gene_space) is dict: - if len(gene_space.items()) == 2: - if ('low' in gene_space.keys()) and ('high' in gene_space.keys()): - pass - else: - self.valid_parameters = False - self.logger.error("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) - raise ValueError("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) - elif len(gene_space.items()) == 3: - if ('low' in gene_space.keys()) and ('high' in gene_space.keys()) and ('step' in gene_space.keys()): - pass - else: - self.valid_parameters = False - self.logger.error("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) - raise ValueError("When the 'gene_space' parameter is of type dict, then it can have only the keys 'low', 'high', and 'step' (optional) but the following keys found: {gene_space_dict_keys}".format(gene_space_dict_keys=gene_space.keys())) - else: - self.valid_parameters = False - self.logger.error("When the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(gene_space.items()))) - raise ValueError("When the 'gene_space' parameter is of type dict, then it must have only 2 items but ({num_items}) items found.".format(num_items=len(gene_space.items()))) - - else: - self.valid_parameters = False - self.logger.error("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) - raise TypeError("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) - - self.gene_space = gene_space - - # Validate init_range_low and init_range_high - if type(init_range_low) in GA.supported_int_float_types: - if type(init_range_high) in GA.supported_int_float_types: - self.init_range_low = init_range_low - self.init_range_high = init_range_high - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'init_range_high' parameter must be either integer or floating-point number but the value ({init_range_high_value}) of type {init_range_high_type} found.".format(init_range_high_value=init_range_high, init_range_high_type=type(init_range_high))) - raise ValueError("The value passed to the 'init_range_high' parameter must be either integer or floating-point number but the value ({init_range_high_value}) of type {init_range_high_type} found.".format(init_range_high_value=init_range_high, init_range_high_type=type(init_range_high))) - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'init_range_low' parameter must be either integer or floating-point number but the value ({init_range_low_value}) of type {init_range_low_type} found.".format(init_range_low_value=init_range_low, init_range_low_type=type(init_range_low))) - raise ValueError("The value passed to the 'init_range_low' parameter must be either integer or floating-point number but the value ({init_range_low_value}) of type {init_range_low_type} found.".format(init_range_low_value=init_range_low, init_range_low_type=type(init_range_low))) - - # Validate random_mutation_min_val and random_mutation_max_val - if type(random_mutation_min_val) in GA.supported_int_float_types: - if type(random_mutation_max_val) in GA.supported_int_float_types: - if random_mutation_min_val == random_mutation_max_val: - if not self.suppress_warnings: warnings.warn("The values of the 2 parameters 'random_mutation_min_val' and 'random_mutation_max_val' are equal and this causes a fixed change to all genes.") - else: - self.valid_parameters = False - self.logger.error("The expected type of the 'random_mutation_max_val' parameter is numeric but {random_mutation_max_val_type} found.".format(random_mutation_max_val_type=type(random_mutation_max_val))) - raise TypeError("The expected type of the 'random_mutation_max_val' parameter is numeric but {random_mutation_max_val_type} found.".format(random_mutation_max_val_type=type(random_mutation_max_val))) - else: - self.valid_parameters = False - self.logger.error("The expected type of the 'random_mutation_min_val' parameter is numeric but {random_mutation_min_val_type} found.".format(random_mutation_min_val_type=type(random_mutation_min_val))) - raise TypeError("The expected type of the 'random_mutation_min_val' parameter is numeric but {random_mutation_min_val_type} found.".format(random_mutation_min_val_type=type(random_mutation_min_val))) - self.random_mutation_min_val = random_mutation_min_val - self.random_mutation_max_val = random_mutation_max_val - - # Validate gene_type - if gene_type in GA.supported_int_float_types: - self.gene_type = [gene_type, None] - self.gene_type_single = True - # A single data type of float with precision. - elif len(gene_type) == 2 and gene_type[0] in GA.supported_float_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): - self.gene_type = gene_type - self.gene_type_single = True - elif type(gene_type) in [list, tuple, numpy.ndarray]: - if num_genes is None: - if initial_population is None: - self.valid_parameters = False - self.logger.error("When the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.") - raise TypeError("When the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.") - elif not len(gene_type) == len(initial_population[0]): - self.valid_parameters = False - self.logger.error("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the number of genes parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != number of genes ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=len(initial_population[0]))) - raise ValueError("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the number of genes parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != number of genes ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=len(initial_population[0]))) - elif not len(gene_type) == num_genes: - self.valid_parameters = False - self.logger.error("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the value passed to the 'num_genes' parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != len(num_genes) ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=num_genes)) - raise ValueError("When the parameter 'gene_type' is nested, then it can be either [float, int] or with length equal to the value passed to the 'num_genes' parameter. Instead, value {gene_type_val} with len(gene_type) ({len_gene_type}) != len(num_genes) ({num_genes}) found.".format(gene_type_val=gene_type, len_gene_type=len(gene_type), num_genes=num_genes)) - for gene_type_idx, gene_type_val in enumerate(gene_type): - if gene_type_val in GA.supported_float_types: - # If the gene type is float and no precision is passed, set it to None. - gene_type[gene_type_idx] = [gene_type_val, None] - elif gene_type_val in GA.supported_int_types: - gene_type[gene_type_idx] = [gene_type_val, None] - elif type(gene_type_val) in [list, tuple, numpy.ndarray]: - # A float type is expected in a list/tuple/numpy.ndarray of length 2. - if len(gene_type_val) == 2: - if gene_type_val[0] in GA.supported_float_types: - if type(gene_type_val[1]) in GA.supported_int_types: - pass - else: - self.valid_parameters = False - self.logger.error("In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_precision_val} with type {gene_type_type}.".format(gene_type_val=gene_type_val, gene_type_precision_val=gene_type_val[1], gene_type_type=gene_type_val[0], gene_type_idx=gene_type_idx)) - raise TypeError("In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_precision_val} with type {gene_type_type}.".format(gene_type_val=gene_type_val, gene_type_precision_val=gene_type_val[1], gene_type_type=gene_type_val[0], gene_type_idx=gene_type_idx)) - else: - self.valid_parameters = False - self.logger.error("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) - raise TypeError("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) - else: - self.valid_parameters = False - self.logger.error("In the 'gene_type' parameter, a precision is specified in a list/tuple/numpy.ndarray of length 2 but value ({gene_type_val}) of type {gene_type_type} with length {gene_type_length} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx, gene_type_length=len(gene_type_val))) - raise ValueError("In the 'gene_type' parameter, a precision is specified in a list/tuple/numpy.ndarray of length 2 but value ({gene_type_val}) of type {gene_type_type} with length {gene_type_length} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx, gene_type_length=len(gene_type_val))) - else: - self.valid_parameters = False - self.logger.error("When a list/tuple/numpy.ndarray is assigned to the 'gene_type' parameter, then its elements must be of integer, floating-point, list, tuple, or numpy.ndarray data types but the value ({gene_type_val}) of type {gene_type_type} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx)) - raise ValueError("When a list/tuple/numpy.ndarray is assigned to the 'gene_type' parameter, then its elements must be of integer, floating-point, list, tuple, or numpy.ndarray data types but the value ({gene_type_val}) of type {gene_type_type} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx)) - self.gene_type = gene_type - self.gene_type_single = False - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) - raise ValueError("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) - - # Build the initial population - if initial_population is None: - if (sol_per_pop is None) or (num_genes is None): - self.valid_parameters = False - self.logger.error("Error creating the initial population:\n\nWhen the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.\nThere are 2 options to prepare the initial population:\n1) Assinging the initial population to the 'initial_population' parameter. In this case, the values of the 2 parameters sol_per_pop and num_genes will be deduced.\n2) Assign integer values to the 'sol_per_pop' and 'num_genes' parameters so that PyGAD can create the initial population automatically.") - raise TypeError("Error creating the initial population:\n\nWhen the parameter 'initial_population' is None, then the 2 parameters 'sol_per_pop' and 'num_genes' cannot be None too.\nThere are 2 options to prepare the initial population:\n1) Assinging the initial population to the 'initial_population' parameter. In this case, the values of the 2 parameters sol_per_pop and num_genes will be deduced.\n2) Assign integer values to the 'sol_per_pop' and 'num_genes' parameters so that PyGAD can create the initial population automatically.") - elif (type(sol_per_pop) is int) and (type(num_genes) is int): - # Validating the number of solutions in the population (sol_per_pop) - if sol_per_pop <= 0: - self.valid_parameters = False - self.logger.error("The number of solutions in the population (sol_per_pop) must be > 0 but ({sol_per_pop}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(sol_per_pop=sol_per_pop)) - raise ValueError("The number of solutions in the population (sol_per_pop) must be > 0 but ({sol_per_pop}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(sol_per_pop=sol_per_pop)) - # Validating the number of gene. - if (num_genes <= 0): - self.valid_parameters = False - self.logger.error("The number of genes cannot be <= 0 but ({num_genes}) found.\n".format(num_genes=num_genes)) - raise ValueError("The number of genes cannot be <= 0 but ({num_genes}) found.\n".format(num_genes=num_genes)) - # When initial_population=None and the 2 parameters sol_per_pop and num_genes have valid integer values, then the initial population is created. - # Inside the initialize_population() method, the initial_population attribute is assigned to keep the initial population accessible. - self.num_genes = num_genes # Number of genes in the solution. - - # In case the 'gene_space' parameter is nested, then make sure the number of its elements equals to the number of genes. - if self.gene_space_nested: - if len(gene_space) != self.num_genes: - self.valid_parameters = False - self.logger.error("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({num_genes})".format(len_gene_space=len(gene_space), num_genes=self.num_genes)) - raise ValueError("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({num_genes})".format(len_gene_space=len(gene_space), num_genes=self.num_genes)) - - self.sol_per_pop = sol_per_pop # Number of solutions in the population. - self.initialize_population(self.init_range_low, - self.init_range_high, - allow_duplicate_genes, - True, - self.gene_type) - else: - self.valid_parameters = False - self.logger.error("The expected type of both the sol_per_pop and num_genes parameters is int but {sol_per_pop_type} and {num_genes_type} found.".format(sol_per_pop_type=type(sol_per_pop), num_genes_type=type(num_genes))) - raise TypeError("The expected type of both the sol_per_pop and num_genes parameters is int but {sol_per_pop_type} and {num_genes_type} found.".format(sol_per_pop_type=type(sol_per_pop), num_genes_type=type(num_genes))) - elif not type(initial_population) in [list, tuple, numpy.ndarray]: - self.valid_parameters = False - self.logger.error("The value assigned to the 'initial_population' parameter is expected to by of type list, tuple, or ndarray but {initial_population_type} found.".format(initial_population_type=type(initial_population))) - raise TypeError("The value assigned to the 'initial_population' parameter is expected to by of type list, tuple, or ndarray but {initial_population_type} found.".format(initial_population_type=type(initial_population))) - elif numpy.array(initial_population).ndim != 2: - self.valid_parameters = False - self.logger.error("A 2D list is expected to the initail_population parameter but a ({initial_population_ndim}-D) list found.".format(initial_population_ndim=numpy.array(initial_population).ndim)) - raise ValueError("A 2D list is expected to the initail_population parameter but a ({initial_population_ndim}-D) list found.".format(initial_population_ndim=numpy.array(initial_population).ndim)) - else: - # Validate the type of each value in the 'initial_population' parameter. - for row_idx in range(len(initial_population)): - for col_idx in range(len(initial_population[0])): - if type(initial_population[row_idx][col_idx]) in GA.supported_int_float_types: - pass - else: - self.valid_parameters = False - self.logger.error("The values in the initial population can be integers or floats but the value ({value}) of type {value_type} found.".format(value=initial_population[row_idx][col_idx], value_type=type(initial_population[row_idx][col_idx]))) - raise TypeError("The values in the initial population can be integers or floats but the value ({value}) of type {value_type} found.".format(value=initial_population[row_idx][col_idx], value_type=type(initial_population[row_idx][col_idx]))) - - # Forcing the initial_population array to have the data type assigned to the gene_type parameter. - if self.gene_type_single == True: - if self.gene_type[1] == None: - self.initial_population = numpy.array(initial_population, dtype=self.gene_type[0]) - else: - self.initial_population = numpy.round(numpy.array(initial_population, dtype=self.gene_type[0]), self.gene_type[1]) - else: - initial_population = numpy.array(initial_population) - self.initial_population = numpy.zeros(shape=(initial_population.shape[0], initial_population.shape[1]), dtype=object) - for gene_idx in range(initial_population.shape[1]): - if self.gene_type[gene_idx][1] is None: - self.initial_population[:, gene_idx] = numpy.asarray(initial_population[:, gene_idx], - dtype=self.gene_type[gene_idx][0]) - else: - self.initial_population[:, gene_idx] = numpy.round(numpy.asarray(initial_population[:, gene_idx], - dtype=self.gene_type[gene_idx][0]), - self.gene_type[gene_idx][1]) - - self.population = self.initial_population.copy() # A NumPy array holding the initial population. - self.num_genes = self.initial_population.shape[1] # Number of genes in the solution. - self.sol_per_pop = self.initial_population.shape[0] # Number of solutions in the population. - self.pop_size = (self.sol_per_pop,self.num_genes) # The population size. - - # Round initial_population and population - self.initial_population = self.round_genes(self.initial_population) - self.population = self.round_genes(self.population) - - # In case the 'gene_space' parameter is nested, then make sure the number of its elements equals to the number of genes. - if self.gene_space_nested: - if len(gene_space) != self.num_genes: - self.valid_parameters = False - self.logger.error("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({len_num_genes})".format(len_gene_space=len(gene_space), len_num_genes=self.num_genes)) - raise ValueError("When the parameter 'gene_space' is nested, then its length must be equal to the value passed to the 'num_genes' parameter. Instead, length of gene_space ({len_gene_space}) != num_genes ({len_num_genes})".format(len_gene_space=len(gene_space), len_num_genes=self.num_genes)) - - # Validating the number of parents to be selected for mating (num_parents_mating) - if num_parents_mating <= 0: - self.valid_parameters = False - self.logger.error("The number of parents mating (num_parents_mating) parameter must be > 0 but ({num_parents_mating}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(num_parents_mating=num_parents_mating)) - raise ValueError("The number of parents mating (num_parents_mating) parameter must be > 0 but ({num_parents_mating}) found. \nThe following parameters must be > 0: \n1) Population size (i.e. number of solutions per population) (sol_per_pop).\n2) Number of selected parents in the mating pool (num_parents_mating).\n".format(num_parents_mating=num_parents_mating)) - - # Validating the number of parents to be selected for mating: num_parents_mating - if (num_parents_mating > self.sol_per_pop): - self.valid_parameters = False - self.logger.error("The number of parents to select for mating ({num_parents_mating}) cannot be greater than the number of solutions in the population ({sol_per_pop}) (i.e., num_parents_mating must always be <= sol_per_pop).\n".format(num_parents_mating=num_parents_mating, sol_per_pop=self.sol_per_pop)) - raise ValueError("The number of parents to select for mating ({num_parents_mating}) cannot be greater than the number of solutions in the population ({sol_per_pop}) (i.e., num_parents_mating must always be <= sol_per_pop).\n".format(num_parents_mating=num_parents_mating, sol_per_pop=self.sol_per_pop)) - - self.num_parents_mating = num_parents_mating - - # crossover: Refers to the method that applies the crossover operator based on the selected type of crossover in the crossover_type property. - # Validating the crossover type: crossover_type - if (crossover_type is None): - self.crossover = None - elif inspect.ismethod(crossover_type): - # Check if the crossover_type is a method that accepts 4 paramaters. - if (crossover_type.__code__.co_argcount == 4): - # The crossover method assigned to the crossover_type parameter is validated. - self.crossover = crossover_type - else: - self.valid_parameters = False - self.logger.error("When 'crossover_type' is assigned to a method, then this crossover method must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The selected parents.\n3) The size of the offspring to be produced.\n4) The instance from the pygad.GA class.\n\nThe passed crossover method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) - raise ValueError("When 'crossover_type' is assigned to a method, then this crossover method must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The selected parents.\n3) The size of the offspring to be produced.\n4) The instance from the pygad.GA class.\n\nThe passed crossover method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) - elif callable(crossover_type): - # Check if the crossover_type is a function that accepts 2 paramaters. - if (crossover_type.__code__.co_argcount == 3): - # The crossover function assigned to the crossover_type parameter is validated. - self.crossover = crossover_type - else: - self.valid_parameters = False - self.logger.error("When 'crossover_type' is assigned to a function, then this crossover function must accept 3 parameters:\n1) The selected parents.\n2) The size of the offspring to be produced.3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed crossover function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) - raise ValueError("When 'crossover_type' is assigned to a function, then this crossover function must accept 3 parameters:\n1) The selected parents.\n2) The size of the offspring to be produced.3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed crossover function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=crossover_type.__code__.co_name, argcount=crossover_type.__code__.co_argcount)) - elif not (type(crossover_type) is str): - self.valid_parameters = False - self.logger.error("The expected type of the 'crossover_type' parameter is either callable or str but {crossover_type} found.".format(crossover_type=type(crossover_type))) - raise TypeError("The expected type of the 'crossover_type' parameter is either callable or str but {crossover_type} found.".format(crossover_type=type(crossover_type))) - else: # type crossover_type is str - crossover_type = crossover_type.lower() - if (crossover_type == "single_point"): - self.crossover = self.single_point_crossover - elif (crossover_type == "two_points"): - self.crossover = self.two_points_crossover - elif (crossover_type == "uniform"): - self.crossover = self.uniform_crossover - elif (crossover_type == "scattered"): - self.crossover = self.scattered_crossover - else: - self.valid_parameters = False - self.logger.error("Undefined crossover type. \nThe assigned value to the crossover_type ({crossover_type}) parameter does not refer to one of the supported crossover types which are: \n-single_point (for single point crossover)\n-two_points (for two points crossover)\n-uniform (for uniform crossover)\n-scattered (for scattered crossover).\n".format(crossover_type=crossover_type)) - raise TypeError("Undefined crossover type. \nThe assigned value to the crossover_type ({crossover_type}) parameter does not refer to one of the supported crossover types which are: \n-single_point (for single point crossover)\n-two_points (for two points crossover)\n-uniform (for uniform crossover)\n-scattered (for scattered crossover).\n".format(crossover_type=crossover_type)) - - self.crossover_type = crossover_type - - # Calculate the value of crossover_probability - if crossover_probability is None: - self.crossover_probability = None - elif type(crossover_probability) in GA.supported_int_float_types: - if crossover_probability >= 0 and crossover_probability <= 1: - self.crossover_probability = crossover_probability - else: - self.valid_parameters = False - self.logger.error("The value assigned to the 'crossover_probability' parameter must be between 0 and 1 inclusive but ({crossover_probability_value}) found.".format(crossover_probability_value=crossover_probability)) - raise ValueError("The value assigned to the 'crossover_probability' parameter must be between 0 and 1 inclusive but ({crossover_probability_value}) found.".format(crossover_probability_value=crossover_probability)) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for the 'crossover_probability' parameter. Float is expected but ({crossover_probability_value}) of type {crossover_probability_type} found.".format(crossover_probability_value=crossover_probability, crossover_probability_type=type(crossover_probability))) - raise TypeError("Unexpected type for the 'crossover_probability' parameter. Float is expected but ({crossover_probability_value}) of type {crossover_probability_type} found.".format(crossover_probability_value=crossover_probability, crossover_probability_type=type(crossover_probability))) - - # mutation: Refers to the method that applies the mutation operator based on the selected type of mutation in the mutation_type property. - # Validating the mutation type: mutation_type - # "adaptive" mutation is supported starting from PyGAD 2.10.0 - if mutation_type is None: - self.mutation = None - elif inspect.ismethod(mutation_type): - # Check if the mutation_type is a method that accepts 3 paramater. - if (mutation_type.__code__.co_argcount == 3): - # The mutation method assigned to the mutation_type parameter is validated. - self.mutation = mutation_type - else: - self.valid_parameters = False - self.logger.error("When 'mutation_type' is assigned to a method, then it must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The offspring to be mutated.\n3) The instance from the pygad.GA class.\n\nThe passed mutation method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) - raise ValueError("When 'mutation_type' is assigned to a method, then it must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The offspring to be mutated.\n3) The instance from the pygad.GA class.\n\nThe passed mutation method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) - elif callable(mutation_type): - # Check if the mutation_type is a function that accepts 2 paramater. - if (mutation_type.__code__.co_argcount == 2): - # The mutation function assigned to the mutation_type parameter is validated. - self.mutation = mutation_type - else: - self.valid_parameters = False - self.logger.error("When 'mutation_type' is assigned to a function, then this mutation function must accept 2 parameters:\n1) The offspring to be mutated.\n2) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed mutation function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) - raise ValueError("When 'mutation_type' is assigned to a function, then this mutation function must accept 2 parameters:\n1) The offspring to be mutated.\n2) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed mutation function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=mutation_type.__code__.co_name, argcount=mutation_type.__code__.co_argcount)) - elif not (type(mutation_type) is str): - self.valid_parameters = False - self.logger.error("The expected type of the 'mutation_type' parameter is either callable or str but {mutation_type} found.".format(mutation_type=type(mutation_type))) - raise TypeError("The expected type of the 'mutation_type' parameter is either callable or str but {mutation_type} found.".format(mutation_type=type(mutation_type))) - else: # type mutation_type is str - mutation_type = mutation_type.lower() - if (mutation_type == "random"): - self.mutation = self.random_mutation - elif (mutation_type == "swap"): - self.mutation = self.swap_mutation - elif (mutation_type == "scramble"): - self.mutation = self.scramble_mutation - elif (mutation_type == "inversion"): - self.mutation = self.inversion_mutation - elif (mutation_type == "adaptive"): - self.mutation = self.adaptive_mutation - else: - self.valid_parameters = False - self.logger.error("Undefined mutation type. \nThe assigned string value to the 'mutation_type' parameter ({mutation_type}) does not refer to one of the supported mutation types which are: \n-random (for random mutation)\n-swap (for swap mutation)\n-inversion (for inversion mutation)\n-scramble (for scramble mutation)\n-adaptive (for adaptive mutation).\n".format(mutation_type=mutation_type)) - raise TypeError("Undefined mutation type. \nThe assigned string value to the 'mutation_type' parameter ({mutation_type}) does not refer to one of the supported mutation types which are: \n-random (for random mutation)\n-swap (for swap mutation)\n-inversion (for inversion mutation)\n-scramble (for scramble mutation)\n-adaptive (for adaptive mutation).\n".format(mutation_type=mutation_type)) - - self.mutation_type = mutation_type - - # Calculate the value of mutation_probability - if not (self.mutation_type is None): - if mutation_probability is None: - self.mutation_probability = None - elif (mutation_type != "adaptive"): - # Mutation probability is fixed not adaptive. - if type(mutation_probability) in GA.supported_int_float_types: - if mutation_probability >= 0 and mutation_probability <= 1: - self.mutation_probability = mutation_probability - else: - self.valid_parameters = False - self.logger.error("The value assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=mutation_probability)) - raise ValueError("The value assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=mutation_probability)) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) - raise TypeError("Unexpected type for the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) - else: - # Mutation probability is adaptive not fixed. - if type(mutation_probability) in [list, tuple, numpy.ndarray]: - if len(mutation_probability) == 2: - for el in mutation_probability: - if type(el) in GA.supported_int_float_types: - if el >= 0 and el <= 1: - pass - else: - self.valid_parameters = False - self.logger.error("The values assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=el)) - raise ValueError("The values assigned to the 'mutation_probability' parameter must be between 0 and 1 inclusive but ({mutation_probability_value}) found.".format(mutation_probability_value=el)) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for a value assigned to the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=el, mutation_probability_type=type(el))) - raise TypeError("Unexpected type for a value assigned to the 'mutation_probability' parameter. A numeric value is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=el, mutation_probability_type=type(el))) - if mutation_probability[0] < mutation_probability[1]: - if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_probability' parameter is {first_el} which is smaller than the second element {second_el}. This means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions. Please make the first element higher than the second element.".format(first_el=mutation_probability[0], second_el=mutation_probability[1])) - self.mutation_probability = mutation_probability - else: - self.valid_parameters = False - self.logger.error("When mutation_type='adaptive', then the 'mutation_probability' parameter must have only 2 elements but ({mutation_probability_length}) element(s) found.".format(mutation_probability_length=len(mutation_probability))) - raise ValueError("When mutation_type='adaptive', then the 'mutation_probability' parameter must have only 2 elements but ({mutation_probability_length}) element(s) found.".format(mutation_probability_length=len(mutation_probability))) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for the 'mutation_probability' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) - raise TypeError("Unexpected type for the 'mutation_probability' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_probability_value}) of type {mutation_probability_type} found.".format(mutation_probability_value=mutation_probability, mutation_probability_type=type(mutation_probability))) - else: - pass - - # Calculate the value of mutation_num_genes - if not (self.mutation_type is None): - if mutation_num_genes is None: - # The mutation_num_genes parameter does not exist. Checking whether adaptive mutation is used. - if (mutation_type != "adaptive"): - # The percent of genes to mutate is fixed not adaptive. - if mutation_percent_genes == 'default'.lower(): - mutation_percent_genes = 10 - # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. - mutation_num_genes = numpy.uint32((mutation_percent_genes*self.num_genes)/100) - # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. - if mutation_num_genes == 0: - if self.mutation_probability is None: - if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate (mutation_percent_genes={mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes, mutation_num=mutation_num_genes)) - mutation_num_genes = 1 - - elif type(mutation_percent_genes) in GA.supported_int_float_types: - if (mutation_percent_genes <= 0 or mutation_percent_genes > 100): - self.valid_parameters = False - self.logger.error("The percentage of selected genes for mutation (mutation_percent_genes) must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) - raise ValueError("The percentage of selected genes for mutation (mutation_percent_genes) must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) - else: - # If mutation_percent_genes equals the string "default", then it is replaced by the numeric value 10. - if mutation_percent_genes == 'default'.lower(): - mutation_percent_genes = 10 - - # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. - mutation_num_genes = numpy.uint32((mutation_percent_genes*self.num_genes)/100) - # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. - if mutation_num_genes == 0: - if self.mutation_probability is None: - if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate (mutation_percent_genes={mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes, mutation_num=mutation_num_genes)) - mutation_num_genes = 1 - else: - self.valid_parameters = False - self.logger.error("Unexpected value or type of the 'mutation_percent_genes' parameter. It only accepts the string 'default' or a numeric value but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=mutation_percent_genes, mutation_percent_genes_type=type(mutation_percent_genes))) - raise TypeError("Unexpected value or type of the 'mutation_percent_genes' parameter. It only accepts the string 'default' or a numeric value but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=mutation_percent_genes, mutation_percent_genes_type=type(mutation_percent_genes))) - else: - # The percent of genes to mutate is adaptive not fixed. - if type(mutation_percent_genes) in [list, tuple, numpy.ndarray]: - if len(mutation_percent_genes) == 2: - mutation_num_genes = numpy.zeros_like(mutation_percent_genes, dtype=numpy.uint32) - for idx, el in enumerate(mutation_percent_genes): - if type(el) in GA.supported_int_float_types: - if (el <= 0 or el > 100): - self.valid_parameters = False - self.logger.error("The values assigned to the 'mutation_percent_genes' must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) - raise ValueError("The values assigned to the 'mutation_percent_genes' must be > 0 and <= 100 but ({mutation_percent_genes}) found.\n".format(mutation_percent_genes=mutation_percent_genes)) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for a value assigned to the 'mutation_percent_genes' parameter. An integer value is expected but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=el, mutation_percent_genes_type=type(el))) - raise TypeError("Unexpected type for a value assigned to the 'mutation_percent_genes' parameter. An integer value is expected but ({mutation_percent_genes_value}) of type {mutation_percent_genes_type} found.".format(mutation_percent_genes_value=el, mutation_percent_genes_type=type(el))) - # At this point of the loop, the current value assigned to the parameter 'mutation_percent_genes' is validated. - # Based on the mutation percentage in the 'mutation_percent_genes' parameter, the number of genes to mutate is calculated. - mutation_num_genes[idx] = numpy.uint32((mutation_percent_genes[idx]*self.num_genes)/100) - # Based on the mutation percentage of genes, if the number of selected genes for mutation is less than the least possible value which is 1, then the number will be set to 1. - if mutation_num_genes[idx] == 0: - if not self.suppress_warnings: warnings.warn("The percentage of genes to mutate ({mutation_percent}) resutled in selecting ({mutation_num}) genes. The number of genes to mutate is set to 1 (mutation_num_genes=1).\nIf you do not want to mutate any gene, please set mutation_type=None.".format(mutation_percent=mutation_percent_genes[idx], mutation_num=mutation_num_genes[idx])) - mutation_num_genes[idx] = 1 - if mutation_percent_genes[0] < mutation_percent_genes[1]: - if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_percent_genes' parameter is ({first_el}) which is smaller than the second element ({second_el}).\nThis means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions.\nPlease make the first element higher than the second element.".format(first_el=mutation_percent_genes[0], second_el=mutation_percent_genes[1])) - # At this point outside the loop, all values of the parameter 'mutation_percent_genes' are validated. Eveyrthing is OK. - else: - self.valid_parameters = False - self.logger.error("When mutation_type='adaptive', then the 'mutation_percent_genes' parameter must have only 2 elements but ({mutation_percent_genes_length}) element(s) found.".format(mutation_percent_genes_length=len(mutation_percent_genes))) - raise ValueError("When mutation_type='adaptive', then the 'mutation_percent_genes' parameter must have only 2 elements but ({mutation_percent_genes_length}) element(s) found.".format(mutation_percent_genes_length=len(mutation_percent_genes))) - else: - if self.mutation_probability is None: - self.valid_parameters = False - self.logger.error("Unexpected type of the 'mutation_percent_genes' parameter. When mutation_type='adaptive', then the 'mutation_percent_genes' parameter should exist and assigned a list/tuple/numpy.ndarray with 2 values but ({mutation_percent_genes_value}) found.".format(mutation_percent_genes_value=mutation_percent_genes)) - raise TypeError("Unexpected type of the 'mutation_percent_genes' parameter. When mutation_type='adaptive', then the 'mutation_percent_genes' parameter should exist and assigned a list/tuple/numpy.ndarray with 2 values but ({mutation_percent_genes_value}) found.".format(mutation_percent_genes_value=mutation_percent_genes)) - # The mutation_num_genes parameter exists. Checking whether adaptive mutation is used. - elif (mutation_type != "adaptive"): - # Number of genes to mutate is fixed not adaptive. - if type(mutation_num_genes) in GA.supported_int_types: - if (mutation_num_genes <= 0): - self.valid_parameters = False - self.logger.error("The number of selected genes for mutation (mutation_num_genes) cannot be <= 0 but ({mutation_num_genes}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes=mutation_num_genes)) - raise ValueError("The number of selected genes for mutation (mutation_num_genes) cannot be <= 0 but ({mutation_num_genes}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes=mutation_num_genes)) - elif (mutation_num_genes > self.num_genes): - self.valid_parameters = False - self.logger.error("The number of selected genes for mutation (mutation_num_genes), which is ({mutation_num_genes}), cannot be greater than the number of genes ({num_genes}).\n".format(mutation_num_genes=mutation_num_genes, num_genes=self.num_genes)) - raise ValueError("The number of selected genes for mutation (mutation_num_genes), which is ({mutation_num_genes}), cannot be greater than the number of genes ({num_genes}).\n".format(mutation_num_genes=mutation_num_genes, num_genes=self.num_genes)) - else: - self.valid_parameters = False - self.logger.error("The 'mutation_num_genes' parameter is expected to be a positive integer but the value ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.\n".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) - raise TypeError("The 'mutation_num_genes' parameter is expected to be a positive integer but the value ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.\n".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) - else: - # Number of genes to mutate is adaptive not fixed. - if type(mutation_num_genes) in [list, tuple, numpy.ndarray]: - if len(mutation_num_genes) == 2: - for el in mutation_num_genes: - if type(el) in GA.supported_int_types: - if (el <= 0): - self.valid_parameters = False - self.logger.error("The values assigned to the 'mutation_num_genes' cannot be <= 0 but ({mutation_num_genes_value}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes_value=el)) - raise ValueError("The values assigned to the 'mutation_num_genes' cannot be <= 0 but ({mutation_num_genes_value}) found. If you do not want to use mutation, please set mutation_type=None\n".format(mutation_num_genes_value=el)) - elif (el > self.num_genes): - self.valid_parameters = False - self.logger.error("The values assigned to the 'mutation_num_genes' cannot be greater than the number of genes ({num_genes}) but ({mutation_num_genes_value}) found.\n".format(mutation_num_genes_value=el, num_genes=self.num_genes)) - raise ValueError("The values assigned to the 'mutation_num_genes' cannot be greater than the number of genes ({num_genes}) but ({mutation_num_genes_value}) found.\n".format(mutation_num_genes_value=el, num_genes=self.num_genes)) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for a value assigned to the 'mutation_num_genes' parameter. An integer value is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=el, mutation_num_genes_type=type(el))) - raise TypeError("Unexpected type for a value assigned to the 'mutation_num_genes' parameter. An integer value is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=el, mutation_num_genes_type=type(el))) - # At this point of the loop, the current value assigned to the parameter 'mutation_num_genes' is validated. - if mutation_num_genes[0] < mutation_num_genes[1]: - if not self.suppress_warnings: warnings.warn("The first element in the 'mutation_num_genes' parameter is {first_el} which is smaller than the second element {second_el}. This means the mutation rate for the high-quality solutions is higher than the mutation rate of the low-quality ones. This causes high disruption in the high qualitiy solutions while making little changes in the low quality solutions. Please make the first element higher than the second element.".format(first_el=mutation_num_genes[0], second_el=mutation_num_genes[1])) - # At this point outside the loop, all values of the parameter 'mutation_num_genes' are validated. Eveyrthing is OK. - else: - self.valid_parameters = False - self.logger.error("When mutation_type='adaptive', then the 'mutation_num_genes' parameter must have only 2 elements but ({mutation_num_genes_length}) element(s) found.".format(mutation_num_genes_length=len(mutation_num_genes))) - raise ValueError("When mutation_type='adaptive', then the 'mutation_num_genes' parameter must have only 2 elements but ({mutation_num_genes_length}) element(s) found.".format(mutation_num_genes_length=len(mutation_num_genes))) - else: - self.valid_parameters = False - self.logger.error("Unexpected type for the 'mutation_num_genes' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) - raise TypeError("Unexpected type for the 'mutation_num_genes' parameter. When mutation_type='adaptive', then list/tuple/numpy.ndarray is expected but ({mutation_num_genes_value}) of type {mutation_num_genes_type} found.".format(mutation_num_genes_value=mutation_num_genes, mutation_num_genes_type=type(mutation_num_genes))) - else: - pass - - # Validating mutation_by_replacement and mutation_type - if self.mutation_type != "random" and self.mutation_by_replacement: - if not self.suppress_warnings: warnings.warn("The mutation_by_replacement parameter is set to True while the mutation_type parameter is not set to random but ({mut_type}). Note that the mutation_by_replacement parameter has an effect only when mutation_type='random'.".format(mut_type=mutation_type)) - - # Check if crossover and mutation are both disabled. - if (self.mutation_type is None) and (self.crossover_type is None): - if not self.suppress_warnings: warnings.warn("The 2 parameters mutation_type and crossover_type are None. This disables any type of evolution the genetic algorithm can make. As a result, the genetic algorithm cannot find a better solution that the best solution in the initial population.") - - # select_parents: Refers to a method that selects the parents based on the parent selection type specified in the parent_selection_type attribute. - # Validating the selected type of parent selection: parent_selection_type - if inspect.ismethod(parent_selection_type): - # Check if the parent_selection_type is a method that accepts 4 paramaters. - if (parent_selection_type.__code__.co_argcount == 4): - # population: Added in PyGAD 2.16.0. It should used only to support custom parent selection functions. Otherwise, it should be left to None to retirve the population by self.population. - # The parent selection method assigned to the parent_selection_type parameter is validated. - self.select_parents = parent_selection_type - else: - self.valid_parameters = False - self.logger.error("When 'parent_selection_type' is assigned to a method, then it must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The fitness values of the current population.\n3) The number of parents needed.\n4) The instance from the pygad.GA class.\n\nThe passed parent selection method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) - raise ValueError("When 'parent_selection_type' is assigned to a method, then it must accept 4 parameters:\n1) Expected to be the 'self' object.\n2) The fitness values of the current population.\n3) The number of parents needed.\n4) The instance from the pygad.GA class.\n\nThe passed parent selection method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) - elif callable(parent_selection_type): - # Check if the parent_selection_type is a function that accepts 3 paramaters. - if (parent_selection_type.__code__.co_argcount == 3): - # population: Added in PyGAD 2.16.0. It should used only to support custom parent selection functions. Otherwise, it should be left to None to retirve the population by self.population. - # The parent selection function assigned to the parent_selection_type parameter is validated. - self.select_parents = parent_selection_type - else: - self.valid_parameters = False - self.logger.error("When 'parent_selection_type' is assigned to a user-defined function, then this parent selection function must accept 3 parameters:\n1) The fitness values of the current population.\n2) The number of parents needed.\n3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed parent selection function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) - raise ValueError("When 'parent_selection_type' is assigned to a user-defined function, then this parent selection function must accept 3 parameters:\n1) The fitness values of the current population.\n2) The number of parents needed.\n3) The instance from the pygad.GA class to retrieve any property like population, gene data type, gene space, etc.\n\nThe passed parent selection function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=parent_selection_type.__code__.co_name, argcount=parent_selection_type.__code__.co_argcount)) - elif not (type(parent_selection_type) is str): - self.valid_parameters = False - self.logger.error("The expected type of the 'parent_selection_type' parameter is either callable or str but {parent_selection_type} found.".format(parent_selection_type=type(parent_selection_type))) - raise TypeError("The expected type of the 'parent_selection_type' parameter is either callable or str but {parent_selection_type} found.".format(parent_selection_type=type(parent_selection_type))) - else: - parent_selection_type = parent_selection_type.lower() - if (parent_selection_type == "sss"): - self.select_parents = self.steady_state_selection - elif (parent_selection_type == "rws"): - self.select_parents = self.roulette_wheel_selection - elif (parent_selection_type == "sus"): - self.select_parents = self.stochastic_universal_selection - elif (parent_selection_type == "random"): - self.select_parents = self.random_selection - elif (parent_selection_type == "tournament"): - self.select_parents = self.tournament_selection - elif (parent_selection_type == "rank"): - self.select_parents = self.rank_selection - else: - self.valid_parameters = False - self.logger.error("Undefined parent selection type: {parent_selection_type}. \nThe assigned value to the 'parent_selection_type' parameter does not refer to one of the supported parent selection techniques which are: \n-sss (for steady state selection)\n-rws (for roulette wheel selection)\n-sus (for stochastic universal selection)\n-rank (for rank selection)\n-random (for random selection)\n-tournament (for tournament selection).\n".format(parent_selection_type=parent_selection_type)) - raise TypeError("Undefined parent selection type: {parent_selection_type}. \nThe assigned value to the 'parent_selection_type' parameter does not refer to one of the supported parent selection techniques which are: \n-sss (for steady state selection)\n-rws (for roulette wheel selection)\n-sus (for stochastic universal selection)\n-rank (for rank selection)\n-random (for random selection)\n-tournament (for tournament selection).\n".format(parent_selection_type=parent_selection_type)) - - # For tournament selection, validate the K value. - if(parent_selection_type == "tournament"): - if (K_tournament > self.sol_per_pop): - K_tournament = self.sol_per_pop - if not self.suppress_warnings: warnings.warn("K of the tournament selection ({K_tournament}) should not be greater than the number of solutions within the population ({sol_per_pop}).\nK will be clipped to be equal to the number of solutions in the population (sol_per_pop).\n".format(K_tournament=K_tournament, sol_per_pop=self.sol_per_pop)) - elif (K_tournament <= 0): - self.valid_parameters = False - self.logger.error("K of the tournament selection cannot be <=0 but ({K_tournament}) found.\n".format(K_tournament=K_tournament)) - raise ValueError("K of the tournament selection cannot be <=0 but ({K_tournament}) found.\n".format(K_tournament=K_tournament)) - - self.K_tournament = K_tournament - - # Validating the number of parents to keep in the next population: keep_parents - if not (type(keep_parents) in GA.supported_int_types): - self.valid_parameters = False - self.logger.error("Incorrect type of the value assigned to the keep_parents parameter. The value ({keep_parents}) of type {keep_parents_type} found but an integer is expected.".format(keep_parents=keep_parents, keep_parents_type=type(keep_parents))) - raise TypeError("Incorrect type of the value assigned to the keep_parents parameter. The value ({keep_parents}) of type {keep_parents_type} found but an integer is expected.".format(keep_parents=keep_parents, keep_parents_type=type(keep_parents))) - elif (keep_parents > self.sol_per_pop or keep_parents > self.num_parents_mating or keep_parents < -1): - self.valid_parameters = False - self.logger.error("Incorrect value to the keep_parents parameter: {keep_parents}. \nThe assigned value to the keep_parent parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Less than or equal to num_parents_mating\n3) Greater than or equal to -1.".format(keep_parents=keep_parents)) - raise ValueError("Incorrect value to the keep_parents parameter: {keep_parents}. \nThe assigned value to the keep_parent parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Less than or equal to num_parents_mating\n3) Greater than or equal to -1.".format(keep_parents=keep_parents)) - - self.keep_parents = keep_parents - - if parent_selection_type == "sss" and self.keep_parents == 0: - if not self.suppress_warnings: warnings.warn("The steady-state parent (sss) selection operator is used despite that no parents are kept in the next generation.") - - # Validating the number of elitism to keep in the next population: keep_elitism - if not (type(keep_elitism) in GA.supported_int_types): - self.valid_parameters = False - self.logger.error("Incorrect type of the value assigned to the keep_elitism parameter. The value ({keep_elitism}) of type {keep_elitism_type} found but an integer is expected.".format(keep_elitism=keep_elitism, keep_elitism_type=type(keep_elitism))) - raise TypeError("Incorrect type of the value assigned to the keep_elitism parameter. The value ({keep_elitism}) of type {keep_elitism_type} found but an integer is expected.".format(keep_elitism=keep_elitism, keep_elitism_type=type(keep_elitism))) - elif (keep_elitism > self.sol_per_pop or keep_elitism < 0): - self.valid_parameters = False - self.logger.error("Incorrect value to the keep_elitism parameter: {keep_elitism}. \nThe assigned value to the keep_elitism parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Greater than or equal to 0.".format(keep_elitism=keep_elitism)) - raise ValueError("Incorrect value to the keep_elitism parameter: {keep_elitism}. \nThe assigned value to the keep_elitism parameter must satisfy the following conditions: \n1) Less than or equal to sol_per_pop\n2) Greater than or equal to 0.".format(keep_elitism=keep_elitism)) - - self.keep_elitism = keep_elitism - - # Validate keep_parents. - if self.keep_elitism == 0: - if (self.keep_parents == -1): # Keep all parents in the next population. - self.num_offspring = self.sol_per_pop - self.num_parents_mating - elif (self.keep_parents == 0): # Keep no parents in the next population. - self.num_offspring = self.sol_per_pop - elif (self.keep_parents > 0): # Keep the specified number of parents in the next population. - self.num_offspring = self.sol_per_pop - self.keep_parents - else: - self.num_offspring = self.sol_per_pop - self.keep_elitism - - # Check if the fitness_func is a method. - # In PyGAD 2.19.0, a method can be passed to the fitness function. If function is passed, then it accepts 2 parameters. If method, then it accepts 3 parameters. - # In PyGAD 2.20.0, a new parameter is passed referring to the instance of the `pygad.GA` class. So, the function accepts 3 parameters and the method accepts 4 parameters. - if inspect.ismethod(fitness_func): - # If the fitness is calculated through a method, not a function, then there is a fourth 'self` paramaters. - if (fitness_func.__code__.co_argcount == 4): - self.fitness_func = fitness_func - else: - self.valid_parameters = False - self.logger.error("In PyGAD 2.20.0, if a method is used to calculate the fitness value, then it must accept 4 parameters\n1) Expected to be the 'self' object.\n2) The instance of the 'pygad.GA' class.\n3) A solution to calculate its fitness value.\n4) The solution's index within the population.\n\nThe passed fitness method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) - raise ValueError("In PyGAD 2.20.0, if a method is used to calculate the fitness value, then it must accept 4 parameters\n1) Expected to be the 'self' object.\n2) The instance of the 'pygad.GA' class.\n3) A solution to calculate its fitness value.\n4) The solution's index within the population.\n\nThe passed fitness method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) - elif callable(fitness_func): - # Check if the fitness function accepts 2 paramaters. - if (fitness_func.__code__.co_argcount == 3): - self.fitness_func = fitness_func - else: - self.valid_parameters = False - self.logger.error("In PyGAD 2.20.0, the fitness function must accept 3 parameters:\n1) The instance of the 'pygad.GA' class.\n2) A solution to calculate its fitness value.\n3) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) - raise ValueError("In PyGAD 2.20.0, the fitness function must accept 3 parameters:\n1) The instance of the 'pygad.GA' class.\n2) A solution to calculate its fitness value.\n3) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the fitness_func parameter is expected to be of type function but {fitness_func_type} found.".format(fitness_func_type=type(fitness_func))) - raise TypeError("The value assigned to the fitness_func parameter is expected to be of type function but {fitness_func_type} found.".format(fitness_func_type=type(fitness_func))) - - if fitness_batch_size is None: - pass - elif not (type(fitness_batch_size) in GA.supported_int_types): - self.valid_parameters = False - self.logger.error("The value assigned to the fitness_batch_size parameter is expected to be integer but the value ({fitness_batch_size}) of type {fitness_batch_size_type} found.".format(fitness_batch_size=fitness_batch_size, fitness_batch_size_type=type(fitness_batch_size))) - raise TypeError("The value assigned to the fitness_batch_size parameter is expected to be integer but the value ({fitness_batch_size}) of type {fitness_batch_size_type} found.".format(fitness_batch_size=fitness_batch_size, fitness_batch_size_type=type(fitness_batch_size))) - elif fitness_batch_size <= 0 or fitness_batch_size > self.sol_per_pop: - self.valid_parameters = False - self.logger.error("The value assigned to the fitness_batch_size parameter must be:\n1) Greater than 0.\n2) Less than or equal to sol_per_pop ({sol_per_pop}).\nBut the value ({fitness_batch_size}) found.".format(fitness_batch_size=fitness_batch_size, sol_per_pop=self.sol_per_pop)) - raise ValueError("The value assigned to the fitness_batch_size parameter must be:\n1) Greater than 0.\n2) Less than or equal to sol_per_pop ({sol_per_pop}).\nBut the value ({fitness_batch_size}) found.".format(fitness_batch_size=fitness_batch_size, sol_per_pop=self.sol_per_pop)) - - self.fitness_batch_size = fitness_batch_size - - # Check if the on_start exists. - if not (on_start is None): - if inspect.ismethod(on_start): - # Check if the on_start method accepts 2 paramaters. - if (on_start.__code__.co_argcount == 2): - self.on_start = on_start - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_start parameter must accept only 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) - raise ValueError("The method assigned to the on_start parameter must accept only 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) - # Check if the on_start is a function. - elif callable(on_start): - # Check if the on_start function accepts only a single paramater. - if (on_start.__code__.co_argcount == 1): - self.on_start = on_start - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_start parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) - raise ValueError("The function assigned to the on_start parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_start.__code__.co_name, argcount=on_start.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_start parameter is expected to be of type function but {on_start_type} found.".format(on_start_type=type(on_start))) - raise TypeError("The value assigned to the on_start parameter is expected to be of type function but {on_start_type} found.".format(on_start_type=type(on_start))) - else: - self.on_start = None - - # Check if the on_fitness exists. - if not (on_fitness is None): - # Check if the on_fitness is a method. - if inspect.ismethod(on_fitness): - # Check if the on_fitness method accepts 3 paramaters. - if (on_fitness.__code__.co_argcount == 3): - self.on_fitness = on_fitness - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_fitness parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) - raise ValueError("The method assigned to the on_fitness parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) - # Check if the on_fitness is a function. - elif callable(on_fitness): - # Check if the on_fitness function accepts 2 paramaters. - if (on_fitness.__code__.co_argcount == 2): - self.on_fitness = on_fitness - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_fitness parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) - raise ValueError("The function assigned to the on_fitness parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_fitness.__code__.co_name, argcount=on_fitness.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_fitness parameter is expected to be of type function but {on_fitness_type} found.".format(on_fitness_type=type(on_fitness))) - raise TypeError("The value assigned to the on_fitness parameter is expected to be of type function but {on_fitness_type} found.".format(on_fitness_type=type(on_fitness))) - else: - self.on_fitness = None - - # Check if the on_parents exists. - if not (on_parents is None): - # Check if the on_parents is a method. - if inspect.ismethod(on_parents): - # Check if the on_parents method accepts 3 paramaters. - if (on_parents.__code__.co_argcount == 3): - self.on_parents = on_parents - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_parents parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) - raise ValueError("The method assigned to the on_parents parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n3) The fitness values of all solutions.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) - # Check if the on_parents is a function. - elif callable(on_parents): - # Check if the on_parents function accepts 2 paramaters. - if (on_parents.__code__.co_argcount == 2): - self.on_parents = on_parents - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_parents parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) - raise ValueError("The function assigned to the on_parents parameter must accept 2 parameters representing the instance of the genetic algorithm and the fitness values of all solutions.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_parents.__code__.co_name, argcount=on_parents.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_parents parameter is expected to be of type function but {on_parents_type} found.".format(on_parents_type=type(on_parents))) - raise TypeError("The value assigned to the on_parents parameter is expected to be of type function but {on_parents_type} found.".format(on_parents_type=type(on_parents))) - else: - self.on_parents = None - - # Check if the on_crossover exists. - if not (on_crossover is None): - # Check if the on_crossover is a method. - if inspect.ismethod(on_crossover): - # Check if the on_crossover method accepts 3 paramaters. - if (on_crossover.__code__.co_argcount == 3): - self.on_crossover = on_crossover - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_crossover parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring generated using crossover.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) - raise ValueError("The method assigned to the on_crossover parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring generated using crossover.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) - # Check if the on_crossover is a function. - elif callable(on_crossover): - # Check if the on_crossover function accepts 2 paramaters. - if (on_crossover.__code__.co_argcount == 2): - self.on_crossover = on_crossover - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_crossover parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring generated using crossover.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) - raise ValueError("The function assigned to the on_crossover parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring generated using crossover.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_crossover.__code__.co_name, argcount=on_crossover.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_crossover parameter is expected to be of type function but {on_crossover_type} found.".format(on_crossover_type=type(on_crossover))) - raise TypeError("The value assigned to the on_crossover parameter is expected to be of type function but {on_crossover_type} found.".format(on_crossover_type=type(on_crossover))) - else: - self.on_crossover = None - - # Check if the on_mutation exists. - if not (on_mutation is None): - # Check if the on_mutation is a method. - if inspect.ismethod(on_mutation): - # Check if the on_mutation method accepts 3 paramaters. - if (on_mutation.__code__.co_argcount == 3): - self.on_mutation = on_mutation - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_mutation parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring after applying the mutation operation.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) - raise ValueError("The method assigned to the on_mutation parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) The offspring after applying the mutation operation.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) - # Check if the on_mutation is a function. - elif callable(on_mutation): - # Check if the on_mutation function accepts 2 paramaters. - if (on_mutation.__code__.co_argcount == 2): - self.on_mutation = on_mutation - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_mutation parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring after applying the mutation operation.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) - raise ValueError("The function assigned to the on_mutation parameter must accept 2 parameters representing the instance of the genetic algorithm and the offspring after applying the mutation operation.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_mutation.__code__.co_name, argcount=on_mutation.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_mutation parameter is expected to be of type function but {on_mutation_type} found.".format(on_mutation_type=type(on_mutation))) - raise TypeError("The value assigned to the on_mutation parameter is expected to be of type function but {on_mutation_type} found.".format(on_mutation_type=type(on_mutation))) - else: - self.on_mutation = None - - # Check if the on_generation exists. - if not (on_generation is None): - # Check if the on_generation is a method. - if inspect.ismethod(on_generation): - # Check if the on_generation method accepts 2 paramaters. - if (on_generation.__code__.co_argcount == 2): - self.on_generation = on_generation - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_generation parameter must accept 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) - raise ValueError("The method assigned to the on_generation parameter must accept 2 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) - # Check if the on_generation is a function. - elif callable(on_generation): - # Check if the on_generation function accepts only a single paramater. - if (on_generation.__code__.co_argcount == 1): - self.on_generation = on_generation - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_generation parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) - raise ValueError("The function assigned to the on_generation parameter must accept only 1 parameter representing the instance of the genetic algorithm.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_generation.__code__.co_name, argcount=on_generation.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the on_generation parameter is expected to be of type function but {on_generation_type} found.".format(on_generation_type=type(on_generation))) - raise TypeError("The value assigned to the on_generation parameter is expected to be of type function but {on_generation_type} found.".format(on_generation_type=type(on_generation))) - else: - self.on_generation = None - - # Check if the on_stop exists. - if not (on_stop is None): - # Check if the on_stop is a method. - if inspect.ismethod(on_stop): - # Check if the on_stop method accepts 3 paramaters. - if (on_stop.__code__.co_argcount == 3): - self.on_stop = on_stop - else: - self.valid_parameters = False - self.logger.error("The method assigned to the on_stop parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) A list of the fitness values of the solutions in the last population.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) - raise ValueError("The method assigned to the on_stop parameter must accept 3 parameters:\n1) Expected to be the 'self' object.\n2) The instance of the genetic algorithm.\n2) A list of the fitness values of the solutions in the last population.\nThe passed method named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) - # Check if the on_stop is a function. - elif callable(on_stop): - # Check if the on_stop function accepts 2 paramaters. - if (on_stop.__code__.co_argcount == 2): - self.on_stop = on_stop - else: - self.valid_parameters = False - self.logger.error("The function assigned to the on_stop parameter must accept 2 parameters representing the instance of the genetic algorithm and a list of the fitness values of the solutions in the last population.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) - raise ValueError("The function assigned to the on_stop parameter must accept 2 parameters representing the instance of the genetic algorithm and a list of the fitness values of the solutions in the last population.\nThe passed function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=on_stop.__code__.co_name, argcount=on_stop.__code__.co_argcount)) - else: - self.valid_parameters = False - self.logger.error("The value assigned to the 'on_stop' parameter is expected to be of type function but {on_stop_type} found.".format(on_stop_type=type(on_stop))) - raise TypeError("The value assigned to the 'on_stop' parameter is expected to be of type function but {on_stop_type} found.".format(on_stop_type=type(on_stop))) - else: - self.on_stop = None - - # Validate delay_after_gen - if type(delay_after_gen) in GA.supported_int_float_types: - if delay_after_gen >= 0.0: - self.delay_after_gen = delay_after_gen - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'delay_after_gen' parameter must be a non-negative number. The value passed is ({delay_after_gen}) of type {delay_after_gen_type}.".format(delay_after_gen=delay_after_gen, delay_after_gen_type=type(delay_after_gen))) - raise ValueError("The value passed to the 'delay_after_gen' parameter must be a non-negative number. The value passed is ({delay_after_gen}) of type {delay_after_gen_type}.".format(delay_after_gen=delay_after_gen, delay_after_gen_type=type(delay_after_gen))) - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'delay_after_gen' parameter must be of type int or float but {delay_after_gen_type} found.".format(delay_after_gen_type=type(delay_after_gen))) - raise TypeError("The value passed to the 'delay_after_gen' parameter must be of type int or float but {delay_after_gen_type} found.".format(delay_after_gen_type=type(delay_after_gen))) - - # Validate save_best_solutions - if type(save_best_solutions) is bool: - if save_best_solutions == True: - if not self.suppress_warnings: warnings.warn("Use the 'save_best_solutions' parameter with caution as it may cause memory overflow when either the number of generations or number of genes is large.") - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'save_best_solutions' parameter must be of type bool but {save_best_solutions_type} found.".format(save_best_solutions_type=type(save_best_solutions))) - raise TypeError("The value passed to the 'save_best_solutions' parameter must be of type bool but {save_best_solutions_type} found.".format(save_best_solutions_type=type(save_best_solutions))) - - # Validate save_solutions - if type(save_solutions) is bool: - if save_solutions == True: - if not self.suppress_warnings: warnings.warn("Use the 'save_solutions' parameter with caution as it may cause memory overflow when either the number of generations, number of genes, or number of solutions in population is large.") - else: - self.valid_parameters = False - self.logger.error("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) - raise TypeError("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) - - # Validate allow_duplicate_genes - if not (type(allow_duplicate_genes) is bool): - self.valid_parameters = False - self.logger.error("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) - raise TypeError("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) - - self.allow_duplicate_genes = allow_duplicate_genes - - self.stop_criteria = [] - self.supported_stop_words = ["reach", "saturate"] - if stop_criteria is None: - # None: Stop after passing through all generations. - self.stop_criteria = None - elif type(stop_criteria) is str: - # reach_{target_fitness}: Stop if the target fitness value is reached. - # saturate_{num_generations}: Stop if the fitness value does not change (saturates) for the given number of generations. - criterion = stop_criteria.split("_") - if len(criterion) == 2: - stop_word = criterion[0] - number = criterion[1] - - if stop_word in self.supported_stop_words: - pass - else: - self.valid_parameters = False - self.logger.error("In the 'stop_criteria' parameter, the supported stop words are '{supported_stop_words}' but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) - raise ValueError("In the 'stop_criteria' parameter, the supported stop words are '{supported_stop_words}' but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) - - if number.replace(".", "").isnumeric(): - number = float(number) - else: - self.valid_parameters = False - self.logger.error("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) - raise ValueError("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) - - self.stop_criteria.append([stop_word, number]) - - else: - self.valid_parameters = False - self.logger.error("For format of a single criterion in the 'stop_criteria' parameter is 'word_number' but '{stop_criteria}' found.".format(stop_criteria=stop_criteria)) - raise ValueError("For format of a single criterion in the 'stop_criteria' parameter is 'word_number' but '{stop_criteria}' found.".format(stop_criteria=stop_criteria)) - - elif type(stop_criteria) in [list, tuple, numpy.ndarray]: - # Remove duplicate criterira by converting the list to a set then back to a list. - stop_criteria = list(set(stop_criteria)) - for idx, val in enumerate(stop_criteria): - if type(val) is str: - criterion = val.split("_") - if len(criterion) == 2: - stop_word = criterion[0] - number = criterion[1] - - if stop_word in self.supported_stop_words: - pass - else: - self.valid_parameters = False - self.logger.error("In the 'stop_criteria' parameter, the supported stop words are {supported_stop_words} but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) - raise ValueError("In the 'stop_criteria' parameter, the supported stop words are {supported_stop_words} but '{stop_word}' found.".format(supported_stop_words=self.supported_stop_words, stop_word=stop_word)) - - if number.replace(".", "").isnumeric(): - number = float(number) - else: - self.valid_parameters = False - self.logger.error("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) - raise ValueError("The value following the stop word in the 'stop_criteria' parameter must be a number but the value ({stop_val}) of type {stop_val_type} found.".format(stop_val=number, stop_val_type=type(number))) - - self.stop_criteria.append([stop_word, number]) - - else: - self.valid_parameters = False - self.logger.error("The format of a single criterion in the 'stop_criteria' parameter is 'word_number' but {stop_criteria} found.".format(stop_criteria=criterion)) - raise ValueError("The format of a single criterion in the 'stop_criteria' parameter is 'word_number' but {stop_criteria} found.".format(stop_criteria=criterion)) - else: - self.valid_parameters = False - self.logger.error("When the 'stop_criteria' parameter is assigned a tuple/list/numpy.ndarray, then its elements must be strings but the value ({stop_criteria_val}) of type {stop_criteria_val_type} found at index {stop_criteria_val_idx}.".format(stop_criteria_val=val, stop_criteria_val_type=type(val), stop_criteria_val_idx=idx)) - raise TypeError("When the 'stop_criteria' parameter is assigned a tuple/list/numpy.ndarray, then its elements must be strings but the value ({stop_criteria_val}) of type {stop_criteria_val_type} found at index {stop_criteria_val_idx}.".format(stop_criteria_val=val, stop_criteria_val_type=type(val), stop_criteria_val_idx=idx)) - else: - self.valid_parameters = False - self.logger.error("The expected value of the 'stop_criteria' is a single string or a list/tuple/numpy.ndarray of strings but the value ({stop_criteria_val}) of type {stop_criteria_type} found.".format(stop_criteria_val=stop_criteria, stop_criteria_type=type(stop_criteria))) - raise TypeError("The expected value of the 'stop_criteria' is a single string or a list/tuple/numpy.ndarray of strings but the value ({stop_criteria_val}) of type {stop_criteria_type} found.".format(stop_criteria_val=stop_criteria, stop_criteria_type=type(stop_criteria))) - - if parallel_processing is None: - self.parallel_processing = None - elif type(parallel_processing) in GA.supported_int_types: - if parallel_processing > 0: - self.parallel_processing = ["thread", parallel_processing] - else: - self.valid_parameters = False - self.logger.error("When the 'parallel_processing' parameter is assigned an integer, then the integer must be positive but the value ({parallel_processing_value}) found.".format(parallel_processing_value=parallel_processing)) - raise ValueError("When the 'parallel_processing' parameter is assigned an integer, then the integer must be positive but the value ({parallel_processing_value}) found.".format(parallel_processing_value=parallel_processing)) - elif type(parallel_processing) in [list, tuple]: - if len(parallel_processing) == 2: - if type(parallel_processing[0]) is str: - if parallel_processing[0] in ["process", "thread"]: - if (type(parallel_processing[1]) in GA.supported_int_types and parallel_processing[1] > 0) or (parallel_processing[1] == 0) or (parallel_processing[1] is None): - if parallel_processing[1] == 0: - # If the number of processes/threads is 0, this means no parallel processing is used. It is equivelant to setting parallel_processing=None. - self.parallel_processing = None - else: - # Whether the second value is None or a positive integer. - self.parallel_processing = parallel_processing - else: - self.valid_parameters = False - self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the second element must be an integer but the value ({second_value}) of type {second_value_type} found.".format(second_value=parallel_processing[1], second_value_type=type(parallel_processing[1]))) - raise TypeError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the second element must be an integer but the value ({second_value}) of type {second_value_type} found.".format(second_value=parallel_processing[1], second_value_type=type(parallel_processing[1]))) - else: - self.valid_parameters = False - self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the value of the first element must be either 'process' or 'thread' but the value ({first_value}) found.".format(first_value=parallel_processing[0])) - raise ValueError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the value of the first element must be either 'process' or 'thread' but the value ({first_value}) found.".format(first_value=parallel_processing[0])) - else: - self.valid_parameters = False - self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then the first element must be of type 'str' but the value ({first_value}) of type {first_value_type} found.".format(first_value=parallel_processing[0], first_value_type=type(parallel_processing[0]))) - raise TypeError("When a list or tuple is assigned to the 'parallel_processing' parameter, then the first element must be of type 'str' but the value ({first_value}) of type {first_value_type} found.".format(first_value=parallel_processing[0], first_value_type=type(parallel_processing[0]))) - else: - self.valid_parameters = False - self.logger.error("When a list or tuple is assigned to the 'parallel_processing' parameter, then it must have 2 elements but ({num_elements}) found.".format(num_elements=len(parallel_processing))) - raise ValueError("When a list or tuple is assigned to the 'parallel_processing' parameter, then it must have 2 elements but ({num_elements}) found.".format(num_elements=len(parallel_processing))) - else: - self.valid_parameters = False - self.logger.error("Unexpected value ({parallel_processing_value}) of type ({parallel_processing_type}) assigned to the 'parallel_processing' parameter. The accepted values for this parameter are:\n1) None: (Default) It means no parallel processing is used.\n2) A positive integer referring to the number of threads to be used (i.e. threads, not processes, are used.\n3) list/tuple: If a list or a tuple of exactly 2 elements is assigned, then:\n\t*1) The first element can be either 'process' or 'thread' to specify whether processes or threads are used, respectively.\n\t*2) The second element can be:\n\t\t**1) A positive integer to select the maximum number of processes or threads to be used.\n\t\t**2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'.\n\t\t**3) None to use the default value as calculated by the concurrent.futures module.".format(parallel_processing_value=parallel_processing, parallel_processing_type=type(parallel_processing))) - raise ValueError("Unexpected value ({parallel_processing_value}) of type ({parallel_processing_type}) assigned to the 'parallel_processing' parameter. The accepted values for this parameter are:\n1) None: (Default) It means no parallel processing is used.\n2) A positive integer referring to the number of threads to be used (i.e. threads, not processes, are used.\n3) list/tuple: If a list or a tuple of exactly 2 elements is assigned, then:\n\t*1) The first element can be either 'process' or 'thread' to specify whether processes or threads are used, respectively.\n\t*2) The second element can be:\n\t\t**1) A positive integer to select the maximum number of processes or threads to be used.\n\t\t**2) 0 to indicate that parallel processing is not used. This is identical to setting 'parallel_processing=None'.\n\t\t**3) None to use the default value as calculated by the concurrent.futures module.".format(parallel_processing_value=parallel_processing, parallel_processing_type=type(parallel_processing))) - - # Set the `run_completed` property to False. It is set to `True` only after the `run()` method is complete. - self.run_completed = False - - # The number of completed generations. - self.generations_completed = 0 - - # At this point, all necessary parameters validation is done successfully and we are sure that the parameters are valid. - self.valid_parameters = True # Set to True when all the parameters passed in the GA class constructor are valid. - - # Parameters of the genetic algorithm. - self.num_generations = abs(num_generations) - self.parent_selection_type = parent_selection_type - - # Parameters of the mutation operation. - self.mutation_percent_genes = mutation_percent_genes - self.mutation_num_genes = mutation_num_genes - - # Even such this parameter is declared in the class header, it is assigned to the object here to access it after saving the object. - self.best_solutions_fitness = [] # A list holding the fitness value of the best solution for each generation. - - self.best_solution_generation = -1 # The generation number at which the best fitness value is reached. It is only assigned the generation number after the `run()` method completes. Otherwise, its value is -1. - - self.save_best_solutions = save_best_solutions - self.best_solutions = [] # Holds the best solution in each generation. - - self.save_solutions = save_solutions - self.solutions = [] # Holds the solutions in each generation. - self.solutions_fitness = [] # Holds the fitness of the solutions in each generation. - - self.last_generation_fitness = None # A list holding the fitness values of all solutions in the last generation. - self.last_generation_parents = None # A list holding the parents of the last generation. - self.last_generation_offspring_crossover = None # A list holding the offspring after applying crossover in the last generation. - self.last_generation_offspring_mutation = None # A list holding the offspring after applying mutation in the last generation. - self.previous_generation_fitness = None # Holds the fitness values of one generation before the fitness values saved in the last_generation_fitness attribute. Added in PyGAD 2.16.2. - self.last_generation_elitism = None # Added in PyGAD 2.18.0. A NumPy array holding the elitism of the current generation according to the value passed in the 'keep_elitism' parameter. It works only if the 'keep_elitism' parameter has a non-zero value. - self.last_generation_elitism_indices = None # Added in PyGAD 2.19.0. A NumPy array holding the indices of the elitism of the current generation. It works only if the 'keep_elitism' parameter has a non-zero value. - - def round_genes(self, solutions): - for gene_idx in range(self.num_genes): - if self.gene_type_single: - if not self.gene_type[1] is None: - solutions[:, gene_idx] = numpy.round(solutions[:, gene_idx], self.gene_type[1]) - else: - if not self.gene_type[gene_idx][1] is None: - solutions[:, gene_idx] = numpy.round(numpy.asarray(solutions[:, gene_idx], - dtype=self.gene_type[gene_idx][0]), - self.gene_type[gene_idx][1]) - return solutions - - def initialize_population(self, - low, - high, - allow_duplicate_genes, - mutation_by_replacement, - gene_type): - - """ - Creates an initial population randomly as a NumPy array. The array is saved in the instance attribute named 'population'. - - low: The lower value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20 and higher. - high: The upper value of the random range from which the gene values in the initial population are selected. It defaults to -4. Available in PyGAD 1.0.20. - - This method assigns the values of the following 3 instance attributes: - 1. pop_size: Size of the population. - 2. population: Initially, holds the initial population and later updated after each generation. - 3. init_population: Keeping the initial population. - """ - - # Population size = (number of chromosomes, number of genes per chromosome) - self.pop_size = (self.sol_per_pop,self.num_genes) # The population will have sol_per_pop chromosome where each chromosome has num_genes genes. - - if self.gene_space is None: - # Creating the initial population randomly. - if self.gene_type_single == True: - self.population = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=self.pop_size), - dtype=self.gene_type[0]) # A NumPy array holding the initial population. - else: - # Create an empty population of dtype=object to support storing mixed data types within the same array. - self.population = numpy.zeros(shape=self.pop_size, dtype=object) - # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. - for gene_idx in range(self.num_genes): - # A vector of all values of this single gene across all solutions in the population. - gene_values = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=self.pop_size[0]), - dtype=self.gene_type[gene_idx][0]) - # Adding the current gene values to the population. - self.population[:, gene_idx] = gene_values - - if allow_duplicate_genes == False: - for solution_idx in range(self.population.shape[0]): - # self.logger.info("Before", self.population[solution_idx]) - self.population[solution_idx], _, _ = self.solve_duplicate_genes_randomly(solution=self.population[solution_idx], - min_val=low, - max_val=high, - mutation_by_replacement=True, - gene_type=gene_type, - num_trials=10) - # self.logger.info("After", self.population[solution_idx]) - - elif self.gene_space_nested: - if self.gene_type_single == True: - self.population = numpy.zeros(shape=self.pop_size, dtype=self.gene_type[0]) - for sol_idx in range(self.sol_per_pop): - for gene_idx in range(self.num_genes): - if type(self.gene_space[gene_idx]) in [list, tuple, range]: - # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. - if type(self.gene_space[gene_idx]) is range: - temp = self.gene_space[gene_idx] - else: - temp = self.gene_space[gene_idx].copy() - for idx, val in enumerate(self.gene_space[gene_idx]): - if val is None: - self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[0])[0] - self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) - self.population[sol_idx, gene_idx] = self.gene_type[0](self.population[sol_idx, gene_idx]) - self.gene_space[gene_idx] = temp - elif type(self.gene_space[gene_idx]) is dict: - if 'step' in self.gene_space[gene_idx].keys(): - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], - stop=self.gene_space[gene_idx]['high'], - step=self.gene_space[gene_idx]['step']), - size=1), - dtype=self.gene_type[0])[0] - else: - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=self.gene_space[gene_idx]['low'], - high=self.gene_space[gene_idx]['high'], - size=1), - dtype=self.gene_type[0])[0] - elif type(self.gene_space[gene_idx]) == type(None): - - # The following commented code replace the None value with a single number that will not change again. - # This means the gene value will be the same across all solutions. - # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - # high=high, - # size=1), dtype=self.gene_type[0])[0] - # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() - - # The above problem is solved by keeping the None value in the gene_space parameter. This forces PyGAD to generate this value for each solution. - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[0])[0] - elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: - self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] - else: - self.population = numpy.zeros(shape=self.pop_size, dtype=object) - for sol_idx in range(self.sol_per_pop): - for gene_idx in range(self.num_genes): - if type(self.gene_space[gene_idx]) in [list, tuple, range]: - # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. - temp = self.gene_space[gene_idx].copy() - for idx, val in enumerate(self.gene_space[gene_idx]): - if val is None: - self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[gene_idx][0])[0] - self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) - self.population[sol_idx, gene_idx] = self.gene_type[gene_idx][0](self.population[sol_idx, gene_idx]) - self.gene_space[gene_idx] = temp.copy() - elif type(self.gene_space[gene_idx]) is dict: - if 'step' in self.gene_space[gene_idx].keys(): - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], - stop=self.gene_space[gene_idx]['high'], - step=self.gene_space[gene_idx]['step']), - size=1), - dtype=self.gene_type[gene_idx][0])[0] - else: - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=self.gene_space[gene_idx]['low'], - high=self.gene_space[gene_idx]['high'], - size=1), - dtype=self.gene_type[gene_idx][0])[0] - elif type(self.gene_space[gene_idx]) == type(None): - # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - # high=high, - # size=1), - # dtype=self.gene_type[gene_idx][0])[0] - - # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() - - temp = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[gene_idx][0])[0] - self.population[sol_idx, gene_idx] = temp - elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: - self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] - else: - if self.gene_type_single == True: - # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. - for idx, curr_gene_space in enumerate(self.gene_space): - if curr_gene_space is None: - self.gene_space[idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[0])[0] - - # Creating the initial population by randomly selecting the genes' values from the values inside the 'gene_space' parameter. - if type(self.gene_space) is dict: - if 'step' in self.gene_space.keys(): - self.population = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space['low'], - stop=self.gene_space['high'], - step=self.gene_space['step']), - size=self.pop_size), - dtype=self.gene_type[0]) - else: - self.population = numpy.asarray(numpy.random.uniform(low=self.gene_space['low'], - high=self.gene_space['high'], - size=self.pop_size), - dtype=self.gene_type[0]) # A NumPy array holding the initial population. - else: - self.population = numpy.asarray(numpy.random.choice(self.gene_space, - size=self.pop_size), - dtype=self.gene_type[0]) # A NumPy array holding the initial population. - else: - # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. - for gene_idx, curr_gene_space in enumerate(self.gene_space): - if curr_gene_space is None: - self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[gene_idx][0])[0] - - # Creating the initial population by randomly selecting the genes' values from the values inside the 'gene_space' parameter. - if type(self.gene_space) is dict: - # Create an empty population of dtype=object to support storing mixed data types within the same array. - self.population = numpy.zeros(shape=self.pop_size, dtype=object) - # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. - for gene_idx in range(self.num_genes): - # A vector of all values of this single gene across all solutions in the population. - if 'step' in self.gene_space[gene_idx].keys(): - gene_values = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], - stop=self.gene_space[gene_idx]['high'], - step=self.gene_space[gene_idx]['step']), - size=self.pop_size[0]), - dtype=self.gene_type[gene_idx][0]) - else: - gene_values = numpy.asarray(numpy.random.uniform(low=self.gene_space['low'], - high=self.gene_space['high'], - size=self.pop_size[0]), - dtype=self.gene_type[gene_idx][0]) - # Adding the current gene values to the population. - self.population[:, gene_idx] = gene_values - - else: - # Create an empty population of dtype=object to support storing mixed data types within the same array. - self.population = numpy.zeros(shape=self.pop_size, dtype=object) - # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. - for gene_idx in range(self.num_genes): - # A vector of all values of this single gene across all solutions in the population. - gene_values = numpy.asarray(numpy.random.choice(self.gene_space, - size=self.pop_size[0]), - dtype=self.gene_type[gene_idx][0]) - # Adding the current gene values to the population. - self.population[:, gene_idx] = gene_values - - if not (self.gene_space is None): - if allow_duplicate_genes == False: - for sol_idx in range(self.population.shape[0]): - self.population[sol_idx], _, _ = self.solve_duplicate_genes_by_space(solution=self.population[sol_idx], - gene_type=self.gene_type, - num_trials=10, - build_initial_pop=True) - - # Keeping the initial population in the initial_population attribute. - self.initial_population = self.population.copy() - - def cal_pop_fitness(self): - - """ - Calculating the fitness values of batches of solutions in the current population. - It returns: - -fitness: An array of the calculated fitness values. - """ - - if self.valid_parameters == False: - self.logger.error("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") - raise Exception("ERROR calling the cal_pop_fitness() method: \nPlease check the parameters passed while creating an instance of the GA class.\n") - - # 'last_generation_parents_as_list' is the list version of 'self.last_generation_parents' - # It is used to return the parent index using the 'in' membership operator of Python lists. This is much faster than using 'numpy.where()'. - if self.last_generation_parents is not None: - last_generation_parents_as_list = [list(gen_parent) for gen_parent in self.last_generation_parents] - - # 'last_generation_elitism_as_list' is the list version of 'self.last_generation_elitism' - # It is used to return the elitism index using the 'in' membership operator of Python lists. This is much faster than using 'numpy.where()'. - if self.last_generation_elitism is not None: - last_generation_elitism_as_list = [list(gen_elitism) for gen_elitism in self.last_generation_elitism] - - pop_fitness = ["undefined"] * len(self.population) - if self.parallel_processing is None: - # Calculating the fitness value of each solution in the current population. - for sol_idx, sol in enumerate(self.population): - # Check if the `save_solutions` parameter is `True` and whether the solution already exists in the `solutions` list. If so, use its fitness rather than calculating it again. - # The functions numpy.any()/numpy.all()/numpy.where()/numpy.equal() are very slow. - # So, list membership operator 'in' is used to check if the solution exists in the 'self.solutions' list. - # Make sure that both the solution and 'self.solutions' are of type 'list' not 'numpy.ndarray'. - # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(self.solutions == numpy.array(sol), axis=1))): - # if (self.save_solutions) and (len(self.solutions) > 0) and (numpy.any(numpy.all(numpy.equal(self.solutions, numpy.array(sol)), axis=1))): - if (self.save_solutions) and (len(self.solutions) > 0) and (list(sol) in self.solutions): - solution_idx = self.solutions.index(list(sol)) - fitness = self.solutions_fitness[solution_idx] - elif (self.keep_elitism > 0) and (self.last_generation_elitism is not None) and (len(self.last_generation_elitism) > 0) and (list(sol) in last_generation_elitism_as_list): - # Return the index of the elitism from the elitism array 'self.last_generation_elitism'. - # This is not its index within the population. It is just its index in the 'self.last_generation_elitism' array. - elitism_idx = last_generation_elitism_as_list.index(list(sol)) - # Use the returned elitism index to return its index in the last population. - elitism_idx = self.last_generation_elitism_indices[elitism_idx] - # Use the elitism's index to return its pre-calculated fitness value. - fitness = self.previous_generation_fitness[elitism_idx] - # If the solutions are not saved (i.e. `save_solutions=False`), check if this solution is a parent from the previous generation and its fitness value is already calculated. If so, use the fitness value instead of calling the fitness function. - # We cannot use the `numpy.where()` function directly because it does not support the `axis` parameter. This is why the `numpy.all()` function is used to match the solutions on axis=1. - # elif (self.last_generation_parents is not None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): - elif ((self.keep_parents == -1) or (self.keep_parents > 0)) and (self.last_generation_parents is not None) and (len(self.last_generation_parents) > 0) and (list(sol) in last_generation_parents_as_list): - # Index of the parent in the 'self.last_generation_parents' array. - # This is not its index within the population. It is just its index in the 'self.last_generation_parents' array. - # parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] - parent_idx = last_generation_parents_as_list.index(list(sol)) - # Use the returned parent index to return its index in the last population. - parent_idx = self.last_generation_parents_indices[parent_idx] - # Use the parent's index to return its pre-calculated fitness value. - fitness = self.previous_generation_fitness[parent_idx] - else: - # Check if batch processing is used. If not, then calculate this missing fitness value. - if self.fitness_batch_size in [1, None]: - fitness = self.fitness_func(self, sol, sol_idx) - if type(fitness) in GA.supported_int_float_types: - pass - else: - self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - else: - # Reaching this point means that batch processing is in effect to calculate the fitness values. - # Do not continue the loop as no fitness is calculated. The fitness will be calculated later in batch mode. - continue - - # This is only executed if the fitness value was already calculated. - pop_fitness[sol_idx] = fitness - - if self.fitness_batch_size not in [1, None]: - # Reaching this block means that batch fitness calculation is used. - - # Indices of the solutions to calculate their fitness. - solutions_indices = numpy.where(numpy.array(pop_fitness) == "undefined")[0] - # Number of batches. - num_batches = int(numpy.ceil(len(solutions_indices) / self.fitness_batch_size)) - # For each batch, get its indices and call the fitness function. - for batch_idx in range(num_batches): - batch_first_index = batch_idx * self.fitness_batch_size - batch_last_index = (batch_idx + 1) * self.fitness_batch_size - batch_indices = solutions_indices[batch_first_index:batch_last_index] - batch_solutions = self.population[batch_indices, :] - - batch_fitness = self.fitness_func(self, batch_solutions, batch_indices) - if type(batch_fitness) not in [list, tuple, numpy.ndarray]: - self.logger.error("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) - raise TypeError("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) - elif len(numpy.array(batch_fitness)) != len(batch_indices): - self.logger.error("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) - raise ValueError("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) - - for index, fitness in zip(batch_indices, batch_fitness): - if type(fitness) in GA.supported_int_float_types: - pop_fitness[index] = fitness - else: - self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - else: - # Calculating the fitness value of each solution in the current population. - for sol_idx, sol in enumerate(self.population): - # Check if the `save_solutions` parameter is `True` and whether the solution already exists in the `solutions` list. If so, use its fitness rather than calculating it again. - # The functions numpy.any()/numpy.all()/numpy.where()/numpy.equal() are very slow. - # So, list membership operator 'in' is used to check if the solution exists in the 'self.solutions' list. - # Make sure that both the solution and 'self.solutions' are of type 'list' not 'numpy.ndarray'. - if (self.save_solutions) and (len(self.solutions) > 0) and (list(sol) in self.solutions): - solution_idx = self.solutions.index(list(sol)) - fitness = self.solutions_fitness[solution_idx] - pop_fitness[sol_idx] = fitness - elif (self.keep_elitism > 0) and (self.last_generation_elitism is not None) and (len(self.last_generation_elitism) > 0) and (list(sol) in last_generation_elitism_as_list): - # Return the index of the elitism from the elitism array 'self.last_generation_elitism'. - # This is not its index within the population. It is just its index in the 'self.last_generation_elitism' array. - elitism_idx = last_generation_elitism_as_list.index(list(sol)) - # Use the returned elitism index to return its index in the last population. - elitism_idx = self.last_generation_elitism_indices[elitism_idx] - # Use the elitism's index to return its pre-calculated fitness value. - fitness = self.previous_generation_fitness[elitism_idx] - - pop_fitness[sol_idx] = fitness - # If the solutions are not saved (i.e. `save_solutions=False`), check if this solution is a parent from the previous generation and its fitness value is already calculated. If so, use the fitness value instead of calling the fitness function. - # We cannot use the `numpy.where()` function directly because it does not support the `axis` parameter. This is why the `numpy.all()` function is used to match the solutions on axis=1. - # elif (self.last_generation_parents is not None) and len(numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0] > 0): - elif ((self.keep_parents == -1) or (self.keep_parents > 0)) and (self.last_generation_parents is not None) and (len(self.last_generation_parents) > 0) and (list(sol) in last_generation_parents_as_list): - # Index of the parent in the 'self.last_generation_parents' array. - # This is not its index within the population. It is just its index in the 'self.last_generation_parents' array. - # parent_idx = numpy.where(numpy.all(self.last_generation_parents == sol, axis=1))[0][0] - parent_idx = last_generation_parents_as_list.index(list(sol)) - # Use the returned parent index to return its index in the last population. - parent_idx = self.last_generation_parents_indices[parent_idx] - # Use the parent's index to return its pre-calculated fitness value. - fitness = self.previous_generation_fitness[parent_idx] - - pop_fitness[sol_idx] = fitness - - # Decide which class to use based on whether the user selected "process" or "thread" - if self.parallel_processing[0] == "process": - ExecutorClass = concurrent.futures.ProcessPoolExecutor - else: - ExecutorClass = concurrent.futures.ThreadPoolExecutor - - # We can use a with statement to ensure threads are cleaned up promptly (https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example) - with ExecutorClass(max_workers=self.parallel_processing[1]) as executor: - solutions_to_submit_indices = [] - solutions_to_submit = [] - for sol_idx, sol in enumerate(self.population): - # The "undefined" value means that the fitness of this solution must be calculated. - if pop_fitness[sol_idx] == "undefined": - solutions_to_submit.append(sol.copy()) - solutions_to_submit_indices.append(sol_idx) - - # Check if batch processing is used. If not, then calculate the fitness value for individual solutions. - if self.fitness_batch_size in [1, None]: - for index, fitness in zip(solutions_to_submit_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), solutions_to_submit, solutions_to_submit_indices)): - if type(fitness) in GA.supported_int_float_types: - pop_fitness[index] = fitness - else: - self.logger.error("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - raise ValueError("The fitness function should return a number but the value {fit_val} of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - else: - # Reaching this block means that batch processing is used. The fitness values are calculated in batches. - - # Number of batches. - num_batches = int(numpy.ceil(len(solutions_to_submit_indices) / self.fitness_batch_size)) - # Each element of the `batches_solutions` list represents the solutions in one batch. - batches_solutions = [] - # Each element of the `batches_indices` list represents the solutions' indices in one batch. - batches_indices = [] - # For each batch, get its indices and call the fitness function. - for batch_idx in range(num_batches): - batch_first_index = batch_idx * self.fitness_batch_size - batch_last_index = (batch_idx + 1) * self.fitness_batch_size - batch_indices = solutions_to_submit_indices[batch_first_index:batch_last_index] - batch_solutions = self.population[batch_indices, :] - - batches_solutions.append(batch_solutions) - batches_indices.append(batch_indices) - - for batch_indices, batch_fitness in zip(batches_indices, executor.map(self.fitness_func, [self]*len(solutions_to_submit_indices), batches_solutions, batches_indices)): - if type(batch_fitness) not in [list, tuple, numpy.ndarray]: - self.logger.error("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) - raise TypeError("Expected to receive a list, tuple, or numpy.ndarray from the fitness function but the value ({batch_fitness}) of type {batch_fitness_type}.".format(batch_fitness=batch_fitness, batch_fitness_type=type(batch_fitness))) - elif len(numpy.array(batch_fitness)) != len(batch_indices): - self.logger.error("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) - raise ValueError("There is a mismatch between the number of solutions passed to the fitness function ({batch_indices_len}) and the number of fitness values returned ({batch_fitness_len}). They must match.".format(batch_fitness_len=len(batch_fitness), batch_indices_len=len(batch_indices))) - - for index, fitness in zip(batch_indices, batch_fitness): - if type(fitness) in GA.supported_int_float_types: - pop_fitness[index] = fitness - else: - self.logger.error("The fitness function should return a number but the value ({fit_val}) of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - raise ValueError("The fitness function should return a number but the value ({fit_val}) of type {fit_type} found.".format(fit_val=fitness, fit_type=type(fitness))) - - pop_fitness = numpy.array(pop_fitness) - - return pop_fitness - - def run(self): - - """ - Runs the genetic algorithm. This is the main method in which the genetic algorithm is evolved through a number of generations. - """ - - if self.valid_parameters == False: - self.logger.error("Error calling the run() method: \nThe run() method cannot be executed with invalid parameters. Please check the parameters passed while creating an instance of the GA class.\n") - raise Exception("Error calling the run() method: \nThe run() method cannot be executed with invalid parameters. Please check the parameters passed while creating an instance of the GA class.\n") - - # Starting from PyGAD 2.18.0, the 4 properties (best_solutions, best_solutions_fitness, solutions, and solutions_fitness) are no longer reset with each call to the run() method. Instead, they are extended. - # For example, if there are 50 generations and the user set save_best_solutions=True, then the length of the 2 properties best_solutions and best_solutions_fitness will be 50 after the first call to the run() method, then 100 after the second call, 150 after the third, and so on. - - # self.best_solutions: Holds the best solution in each generation. - if type(self.best_solutions) is numpy.ndarray: - self.best_solutions = list(self.best_solutions) - # self.best_solutions_fitness: A list holding the fitness value of the best solution for each generation. - if type(self.best_solutions_fitness) is numpy.ndarray: - self.best_solutions_fitness = list(self.best_solutions_fitness) - # self.solutions: Holds the solutions in each generation. - if type(self.solutions) is numpy.ndarray: - self.solutions = list(self.solutions) - # self.solutions_fitness: Holds the fitness of the solutions in each generation. - if type(self.solutions_fitness) is numpy.ndarray: - self.solutions_fitness = list(self.solutions_fitness) - - if not (self.on_start is None): - self.on_start(self) - - stop_run = False - - # To continue from where we stopped, the first generation index should start from the value of the 'self.generations_completed' parameter. - if self.generations_completed != 0 and type(self.generations_completed) in GA.supported_int_types: - # If the 'self.generations_completed' parameter is not '0', then this means we continue execution. - generation_first_idx = self.generations_completed - generation_last_idx = self.num_generations + self.generations_completed - else: - # If the 'self.generations_completed' parameter is '0', then stat from scratch. - generation_first_idx = 0 - generation_last_idx = self.num_generations - - # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. - self.last_generation_fitness = self.cal_pop_fitness() - - best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) - - # Appending the best solution in the initial population to the best_solutions list. - if self.save_best_solutions: - self.best_solutions.append(best_solution) - - for generation in range(generation_first_idx, generation_last_idx): - if not (self.on_fitness is None): - self.on_fitness(self, self.last_generation_fitness) - - # Appending the fitness value of the best solution in the current generation to the best_solutions_fitness attribute. - self.best_solutions_fitness.append(best_solution_fitness) - - # Appending the solutions in the current generation to the solutions list. - if self.save_solutions: - # self.solutions.extend(self.population.copy()) - population_as_list = self.population.copy() - population_as_list = [list(item) for item in population_as_list] - self.solutions.extend(population_as_list) - - self.solutions_fitness.extend(self.last_generation_fitness) - - # Selecting the best parents in the population for mating. - if callable(self.parent_selection_type): - self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self, - self.last_generation_fitness, - self.num_parents_mating, self) - if not type(self.last_generation_parents) is numpy.ndarray: - self.logger.error("The type of the iterable holding the selected parents is expected to be (numpy.ndarray) but {last_generation_parents_type} found.".format(last_generation_parents_type=type(self.last_generation_parents))) - raise TypeError("The type of the iterable holding the selected parents is expected to be (numpy.ndarray) but {last_generation_parents_type} found.".format(last_generation_parents_type=type(self.last_generation_parents))) - if not type(self.last_generation_parents_indices) is numpy.ndarray: - self.logger.error("The type of the iterable holding the selected parents' indices is expected to be (numpy.ndarray) but {last_generation_parents_indices_type} found.".format(last_generation_parents_indices_type=type(self.last_generation_parents_indices))) - raise TypeError("The type of the iterable holding the selected parents' indices is expected to be (numpy.ndarray) but {last_generation_parents_indices_type} found.".format(last_generation_parents_indices_type=type(self.last_generation_parents_indices))) - else: - self.last_generation_parents, self.last_generation_parents_indices = self.select_parents(self.last_generation_fitness, - num_parents=self.num_parents_mating) - - # Validate the output of the parent selection step: self.select_parents() - if self.last_generation_parents.shape != (self.num_parents_mating, self.num_genes): - if self.last_generation_parents.shape[0] != self.num_parents_mating: - self.logger.error("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. It is expected to select ({num_parents_mating}) parents but ({num_parents_mating_selected}) selected.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_parents_mating=self.num_parents_mating, num_parents_mating_selected=self.last_generation_parents.shape[0])) - raise ValueError("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. It is expected to select ({num_parents_mating}) parents but ({num_parents_mating_selected}) selected.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_parents_mating=self.num_parents_mating, num_parents_mating_selected=self.last_generation_parents.shape[0])) - elif self.last_generation_parents.shape[1] != self.num_genes: - self.logger.error("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. Parents are expected to have ({num_genes}) genes but ({num_genes_selected}) produced.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_genes=self.num_genes, num_genes_selected=self.last_generation_parents.shape[1])) - raise ValueError("Size mismatch between the size of the selected parents {parents_size_actual} and the expected size {parents_size_expected}. Parents are expected to have ({num_genes}) genes but ({num_genes_selected}) produced.".format(parents_size_actual=self.last_generation_parents.shape, parents_size_expected=(self.num_parents_mating, self.num_genes), num_genes=self.num_genes, num_genes_selected=self.last_generation_parents.shape[1])) - - if self.last_generation_parents_indices.ndim != 1: - self.logger.error("The iterable holding the selected parents indices is expected to have 1 dimension but ({parents_indices_ndim}) found.".format(parents_indices_ndim=len(self.last_generation_parents_indices))) - raise ValueError("The iterable holding the selected parents indices is expected to have 1 dimension but ({parents_indices_ndim}) found.".format(parents_indices_ndim=len(self.last_generation_parents_indices))) - elif len(self.last_generation_parents_indices) != self.num_parents_mating: - self.logger.error("The iterable holding the selected parents indices is expected to have ({num_parents_mating}) values but ({num_parents_mating_selected}) found.".format(num_parents_mating=self.num_parents_mating, num_parents_mating_selected=len(self.last_generation_parents_indices))) - raise ValueError("The iterable holding the selected parents indices is expected to have ({num_parents_mating}) values but ({num_parents_mating_selected}) found.".format(num_parents_mating=self.num_parents_mating, num_parents_mating_selected=len(self.last_generation_parents_indices))) - - if not (self.on_parents is None): - self.on_parents(self, self.last_generation_parents) - - # If self.crossover_type=None, then no crossover is applied and thus no offspring will be created in the next generations. The next generation will use the solutions in the current population. - if self.crossover_type is None: - if self.keep_elitism == 0: - num_parents_to_keep = self.num_parents_mating if self.keep_parents == -1 else self.keep_parents - if self.num_offspring <= num_parents_to_keep: - self.last_generation_offspring_crossover = self.last_generation_parents[0:self.num_offspring] - else: - self.last_generation_offspring_crossover = numpy.concatenate((self.last_generation_parents, self.population[0:(self.num_offspring - self.last_generation_parents.shape[0])])) - else: - # The steady_state_selection() function is called to select the best solutions (i.e. elitism). The keep_elitism parameter defines the number of these solutions. - # The steady_state_selection() function is still called here even if its output may not be used given that the condition of the next if statement is True. The reason is that it will be used later. - self.last_generation_elitism, _ = self.steady_state_selection(self.last_generation_fitness, - num_parents=self.keep_elitism) - if self.num_offspring <= self.keep_elitism: - self.last_generation_offspring_crossover = self.last_generation_parents[0:self.num_offspring] - else: - self.last_generation_offspring_crossover = numpy.concatenate((self.last_generation_elitism, self.population[0:(self.num_offspring - self.last_generation_elitism.shape[0])])) - else: - # Generating offspring using crossover. - if callable(self.crossover_type): - self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, - (self.num_offspring, self.num_genes), - self) - if not type(self.last_generation_offspring_crossover) is numpy.ndarray: - self.logger.error("The output of the crossover step is expected to be of type (numpy.ndarray) but {last_generation_offspring_crossover_type} found.".format(last_generation_offspring_crossover_type=type(self.last_generation_offspring_crossover))) - raise TypeError("The output of the crossover step is expected to be of type (numpy.ndarray) but {last_generation_offspring_crossover_type} found.".format(last_generation_offspring_crossover_type=type(self.last_generation_offspring_crossover))) - else: - self.last_generation_offspring_crossover = self.crossover(self.last_generation_parents, - offspring_size=(self.num_offspring, self.num_genes)) - if self.last_generation_offspring_crossover.shape != (self.num_offspring, self.num_genes): - if self.last_generation_offspring_crossover.shape[0] != self.num_offspring: - self.logger.error("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_crossover.shape[0])) - raise ValueError("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_crossover.shape[0])) - elif self.last_generation_offspring_crossover.shape[1] != self.num_genes: - self.logger.error("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_crossover.shape[1])) - raise ValueError("Size mismatch between the crossover output {crossover_actual_size} and the expected crossover output {crossover_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(crossover_actual_size=self.last_generation_offspring_crossover.shape, crossover_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_crossover.shape[1])) - - # PyGAD 2.18.2 // The on_crossover() callback function is called even if crossover_type is None. - if not (self.on_crossover is None): - self.on_crossover(self, self.last_generation_offspring_crossover) - - # If self.mutation_type=None, then no mutation is applied and thus no changes are applied to the offspring created using the crossover operation. The offspring will be used unchanged in the next generation. - if self.mutation_type is None: - self.last_generation_offspring_mutation = self.last_generation_offspring_crossover - else: - # Adding some variations to the offspring using mutation. - if callable(self.mutation_type): - self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover, - self) - if not type(self.last_generation_offspring_mutation) is numpy.ndarray: - self.logger.error("The output of the mutation step is expected to be of type (numpy.ndarray) but {last_generation_offspring_mutation_type} found.".format(last_generation_offspring_mutation_type=type(self.last_generation_offspring_mutation))) - raise TypeError("The output of the mutation step is expected to be of type (numpy.ndarray) but {last_generation_offspring_mutation_type} found.".format(last_generation_offspring_mutation_type=type(self.last_generation_offspring_mutation))) - else: - self.last_generation_offspring_mutation = self.mutation(self.last_generation_offspring_crossover) - - if self.last_generation_offspring_mutation.shape != (self.num_offspring, self.num_genes): - if self.last_generation_offspring_mutation.shape[0] != self.num_offspring: - self.logger.error("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_mutation.shape[0])) - raise ValueError("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected to produce ({num_offspring}) offspring but ({num_offspring_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_offspring=self.num_offspring, num_offspring_produced=self.last_generation_offspring_mutation.shape[0])) - elif self.last_generation_offspring_mutation.shape[1] != self.num_genes: - self.logger.error("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_mutation.shape[1])) - raise ValueError("Size mismatch between the mutation output {mutation_actual_size} and the expected mutation output {mutation_expected_size}. It is expected that the offspring has ({num_genes}) genes but ({num_genes_produced}) produced.".format(mutation_actual_size=self.last_generation_offspring_mutation.shape, mutation_expected_size=(self.num_offspring, self.num_genes), num_genes=self.num_genes, num_genes_produced=self.last_generation_offspring_mutation.shape[1])) - - # PyGAD 2.18.2 // The on_mutation() callback function is called even if mutation_type is None. - if not (self.on_mutation is None): - self.on_mutation(self, self.last_generation_offspring_mutation) - - # Update the population attribute according to the offspring generated. - if self.keep_elitism == 0: - # If the keep_elitism parameter is 0, then the keep_parents parameter will be used to decide if the parents are kept in the next generation. - if (self.keep_parents == 0): - self.population = self.last_generation_offspring_mutation - elif (self.keep_parents == -1): - # Creating the new population based on the parents and offspring. - self.population[0:self.last_generation_parents.shape[0], :] = self.last_generation_parents - self.population[self.last_generation_parents.shape[0]:, :] = self.last_generation_offspring_mutation - elif (self.keep_parents > 0): - parents_to_keep, _ = self.steady_state_selection(self.last_generation_fitness, - num_parents=self.keep_parents) - self.population[0:parents_to_keep.shape[0], :] = parents_to_keep - self.population[parents_to_keep.shape[0]:, :] = self.last_generation_offspring_mutation - else: - self.last_generation_elitism, self.last_generation_elitism_indices = self.steady_state_selection(self.last_generation_fitness, - num_parents=self.keep_elitism) - self.population[0:self.last_generation_elitism.shape[0], :] = self.last_generation_elitism - self.population[self.last_generation_elitism.shape[0]:, :] = self.last_generation_offspring_mutation - - self.generations_completed = generation + 1 # The generations_completed attribute holds the number of the last completed generation. - - self.previous_generation_fitness = self.last_generation_fitness.copy() - # Measuring the fitness of each chromosome in the population. Save the fitness in the last_generation_fitness attribute. - self.last_generation_fitness = self.cal_pop_fitness() - - best_solution, best_solution_fitness, best_match_idx = self.best_solution(pop_fitness=self.last_generation_fitness) - - # Appending the best solution in the current generation to the best_solutions list. - if self.save_best_solutions: - self.best_solutions.append(best_solution) - - # If the on_generation attribute is not None, then cal the callback function after the generation. - if not (self.on_generation is None): - r = self.on_generation(self) - if type(r) is str and r.lower() == "stop": - # Before aborting the loop, save the fitness value of the best solution. - # _, best_solution_fitness, _ = self.best_solution() - self.best_solutions_fitness.append(best_solution_fitness) - break - - if not self.stop_criteria is None: - for criterion in self.stop_criteria: - if criterion[0] == "reach": - if max(self.last_generation_fitness) >= criterion[1]: - stop_run = True - break - elif criterion[0] == "saturate": - criterion[1] = int(criterion[1]) - if (self.generations_completed >= criterion[1]): - if (self.best_solutions_fitness[self.generations_completed - criterion[1]] - self.best_solutions_fitness[self.generations_completed - 1]) == 0: - stop_run = True - break - - if stop_run: - break - - time.sleep(self.delay_after_gen) - - # Save the fitness of the last generation. - if self.save_solutions: - # self.solutions.extend(self.population.copy()) - population_as_list = self.population.copy() - population_as_list = [list(item) for item in population_as_list] - self.solutions.extend(population_as_list) - - self.solutions_fitness.extend(self.last_generation_fitness) - - # Save the fitness value of the best solution. - _, best_solution_fitness, _ = self.best_solution(pop_fitness=self.last_generation_fitness) - self.best_solutions_fitness.append(best_solution_fitness) - - self.best_solution_generation = numpy.where(numpy.array(self.best_solutions_fitness) == numpy.max(numpy.array(self.best_solutions_fitness)))[0][0] - # After the run() method completes, the run_completed flag is changed from False to True. - self.run_completed = True # Set to True only after the run() method completes gracefully. - - if not (self.on_stop is None): - self.on_stop(self, self.last_generation_fitness) - - # Converting the 'best_solutions' list into a NumPy array. - self.best_solutions = numpy.array(self.best_solutions) - - # Converting the 'solutions' list into a NumPy array. - # self.solutions = numpy.array(self.solutions) - - def best_solution(self, pop_fitness=None): - - """ - Returns information about the best solution found by the genetic algorithm. - Accepts the following parameters: - pop_fitness: An optional parameter holding the fitness values of the solutions in the latest population. If passed, then it save time calculating the fitness. If None, then the 'cal_pop_fitness()' method is called to calculate the fitness of the latest population. - The following are returned: - -best_solution: Best solution in the current population. - -best_solution_fitness: Fitness value of the best solution. - -best_match_idx: Index of the best solution in the current population. - """ - - if pop_fitness is None: - # If the 'pop_fitness' parameter is not passed, then we have to call the 'cal_pop_fitness()' method to calculate the fitness of all solutions in the lastest population. - pop_fitness = self.cal_pop_fitness() - # Verify the type of the 'pop_fitness' parameter. - elif type(pop_fitness) in [tuple, list, numpy.ndarray]: - # Verify that the length of the passed population fitness matches the length of the 'self.population' attribute. - if len(pop_fitness) == len(self.population): - # This successfully verifies the 'pop_fitness' parameter. - pass - else: - self.logger.error("The length of the list/tuple/numpy.ndarray passed to the 'pop_fitness' parameter ({pop_fitness_length}) must match the length of the 'self.population' attribute ({population_length}).".format(pop_fitness_length=len(pop_fitness), population_length=len(self.population))) - raise ValueError("The length of the list/tuple/numpy.ndarray passed to the 'pop_fitness' parameter ({pop_fitness_length}) must match the length of the 'self.population' attribute ({population_length}).".format(pop_fitness_length=len(pop_fitness), population_length=len(self.population))) - else: - self.logger.error("The type of the 'pop_fitness' parameter is expected to be list, tuple, or numpy.ndarray but ({pop_fitness_type}) found.".format(pop_fitness_type=type(pop_fitness))) - raise ValueError("The type of the 'pop_fitness' parameter is expected to be list, tuple, or numpy.ndarray but ({pop_fitness_type}) found.".format(pop_fitness_type=type(pop_fitness))) - - # Return the index of the best solution that has the best fitness value. - best_match_idx = numpy.where(pop_fitness == numpy.max(pop_fitness))[0][0] - - best_solution = self.population[best_match_idx, :].copy() - best_solution_fitness = pop_fitness[best_match_idx] - - return best_solution, best_solution_fitness, best_match_idx - - def save(self, filename): - - """ - Saves the genetic algorithm instance: - -filename: Name of the file to save the instance. No extension is needed. - """ - - cloudpickle_serialized_object = cloudpickle.dumps(self) - with open(filename + ".pkl", 'wb') as file: - file.write(cloudpickle_serialized_object) - cloudpickle.dump(self, file) - - def summary(self, - line_length=70, - fill_character=" ", - line_character="-", - line_character2="=", - columns_equal_len=False, - print_step_parameters=True, - print_parameters_summary=True): - """ - The summary() method prints a summary of the PyGAD lifecycle in a Keras style. - The parameters are: - line_length: An integer representing the length of the single line in characters. - fill_character: A character to fill the lines. - line_character: A character for creating a line separator. - line_character2: A secondary character to create a line separator. - columns_equal_len: The table rows are split into equal-sized columns or split subjective to the width needed. - print_step_parameters: Whether to print extra parameters about each step inside the step. If print_step_parameters=False and print_parameters_summary=True, then the parameters of each step are printed at the end of the table. - print_parameters_summary: Whether to print parameters summary at the end of the table. If print_step_parameters=False, then the parameters of each step are printed at the end of the table too. - """ - - summary_output = "" - - def fill_message(msg, line_length=line_length, fill_character=fill_character): - num_spaces = int((line_length - len(msg))/2) - num_spaces = int(num_spaces / len(fill_character)) - msg = "{spaces}{msg}{spaces}".format(msg=msg, spaces=fill_character * num_spaces) - return msg - - def line_separator(line_length=line_length, line_character=line_character): - num_characters = int(line_length / len(line_character)) - return line_character * num_characters - - def create_row(columns, line_length=line_length, fill_character=fill_character, split_percentages=None): - filled_columns = [] - if split_percentages == None: - split_percentages = [int(100/len(columns))] * 3 - columns_lengths = [int((split_percentages[idx] * line_length) / 100) for idx in range(len(split_percentages))] - for column_idx, column in enumerate(columns): - current_column_length = len(column) - extra_characters = columns_lengths[column_idx] - current_column_length - filled_column = column + fill_character * extra_characters - filled_column = column + fill_character * extra_characters - filled_columns.append(filled_column) - - return "".join(filled_columns) - - def print_parent_selection_params(): - nonlocal summary_output - m = "Number of Parents: {num_parents_mating}".format(num_parents_mating=self.num_parents_mating) - self.logger.info(m) - summary_output = summary_output + m + "\n" - if self.parent_selection_type == "tournament": - m = "K Tournament: {K_tournament}".format(K_tournament=self.K_tournament) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - def print_fitness_params(): - nonlocal summary_output - if not self.fitness_batch_size is None: - m = "Fitness batch size: {fitness_batch_size}".format(fitness_batch_size=self.fitness_batch_size) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - def print_crossover_params(): - nonlocal summary_output - if not self.crossover_probability is None: - m = "Crossover probability: {crossover_probability}".format(crossover_probability=self.crossover_probability) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - def print_mutation_params(): - nonlocal summary_output - if not self.mutation_probability is None: - m = "Mutation Probability: {mutation_probability}".format(mutation_probability=self.mutation_probability) - self.logger.info(m) - summary_output = summary_output + m + "\n" - if self.mutation_percent_genes == "default": - m = "Mutation Percentage: {mutation_percent_genes}".format(mutation_percent_genes=self.mutation_percent_genes) - self.logger.info(m) - summary_output = summary_output + m + "\n" - # Number of mutation genes is already showed above. - m = "Mutation Genes: {mutation_num_genes}".format(mutation_num_genes=self.mutation_num_genes) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Random Mutation Range: ({random_mutation_min_val}, {random_mutation_max_val})".format(random_mutation_min_val=self.random_mutation_min_val, random_mutation_max_val=self.random_mutation_max_val) - self.logger.info(m) - summary_output = summary_output + m + "\n" - if not self.gene_space is None: - m = "Gene Space: {gene_space}".format(gene_space=self.gene_space) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Mutation by Replacement: {mutation_by_replacement}".format(mutation_by_replacement=self.mutation_by_replacement) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Allow Duplicated Genes: {allow_duplicate_genes}".format(allow_duplicate_genes=self.allow_duplicate_genes) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - def print_on_generation_params(): - nonlocal summary_output - if not self.stop_criteria is None: - m = "Stop Criteria: {stop_criteria}".format(stop_criteria=self.stop_criteria) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - def print_params_summary(): - nonlocal summary_output - m = "Population Size: ({sol_per_pop}, {num_genes})".format(sol_per_pop=self.sol_per_pop, num_genes=self.num_genes) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Number of Generations: {num_generations}".format(num_generations=self.num_generations) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Initial Population Range: ({init_range_low}, {init_range_high})".format(init_range_low=self.init_range_low, init_range_high=self.init_range_high) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - if not print_step_parameters: - print_fitness_params() - - if not print_step_parameters: - print_parent_selection_params() - - if self.keep_elitism != 0: - m = "Keep Elitism: {keep_elitism}".format(keep_elitism=self.keep_elitism) - self.logger.info(m) - summary_output = summary_output + m + "\n" - else: - m = "Keep Parents: {keep_parents}".format(keep_parents=self.keep_parents) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Gene DType: {gene_type}".format(gene_type=self.gene_type) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - if not print_step_parameters: - print_crossover_params() - - if not print_step_parameters: - print_mutation_params() - - if self.delay_after_gen != 0: - m = "Post-Generation Delay: {delay_after_gen}".format(delay_after_gen=self.delay_after_gen) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - if not print_step_parameters: - print_on_generation_params() - - if not self.parallel_processing is None: - m = "Parallel Processing: {parallel_processing}".format(parallel_processing=self.parallel_processing) - self.logger.info(m) - summary_output = summary_output + m + "\n" - if not self.random_seed is None: - m = "Random Seed: {random_seed}".format(random_seed=self.random_seed) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Save Best Solutions: {save_best_solutions}".format(save_best_solutions=self.save_best_solutions) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = "Save Solutions: {save_solutions}".format(save_solutions=self.save_solutions) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - m = line_separator(line_character=line_character) - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = fill_message("PyGAD Lifecycle") - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = line_separator(line_character=line_character2) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - lifecycle_steps = ["on_start()", "Fitness Function", "On Fitness", "Parent Selection", "On Parents", "Crossover", "On Crossover", "Mutation", "On Mutation", "On Generation", "On Stop"] - lifecycle_functions = [self.on_start, self.fitness_func, self.on_fitness, self.select_parents, self.on_parents, self.crossover, self.on_crossover, self.mutation, self.on_mutation, self.on_generation, self.on_stop] - lifecycle_functions = [getattr(lifecycle_func, '__name__', "None") for lifecycle_func in lifecycle_functions] - lifecycle_functions = [lifecycle_func + "()" if lifecycle_func != "None" else "None" for lifecycle_func in lifecycle_functions] - lifecycle_output = ["None", "(1)", "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "({num_parents_mating}, {num_genes})".format(num_parents_mating=self.num_parents_mating, num_genes=self.num_genes), "None", "None", "None"] - lifecycle_step_parameters = [None, print_fitness_params, None, print_parent_selection_params, None, print_crossover_params, None, print_mutation_params, None, print_on_generation_params, None] - - if not columns_equal_len: - max_lengthes = [max(list(map(len, lifecycle_steps))), max(list(map(len, lifecycle_functions))), max(list(map(len, lifecycle_output)))] - split_percentages = [int((column_len / sum(max_lengthes)) * 100) for column_len in max_lengthes] - else: - split_percentages = None - - header_columns = ["Step", "Handler", "Output Shape"] - header_row = create_row(header_columns, split_percentages=split_percentages) - m = header_row - self.logger.info(m) - summary_output = summary_output + m + "\n" - m = line_separator(line_character=line_character2) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - for lifecycle_idx in range(len(lifecycle_steps)): - lifecycle_column = [lifecycle_steps[lifecycle_idx], lifecycle_functions[lifecycle_idx], lifecycle_output[lifecycle_idx]] - if lifecycle_column[1] == "None": - continue - lifecycle_row = create_row(lifecycle_column, split_percentages=split_percentages) - m = lifecycle_row - self.logger.info(m) - summary_output = summary_output + m + "\n" - if print_step_parameters: - if not lifecycle_step_parameters[lifecycle_idx] is None: - lifecycle_step_parameters[lifecycle_idx]() - m = line_separator(line_character=line_character) - self.logger.info(m) - summary_output = summary_output + m + "\n" - - m = line_separator(line_character=line_character2) - self.logger.info(m) - summary_output = summary_output + m + "\n" - if print_parameters_summary: - print_params_summary() - m = line_separator(line_character=line_character2) - self.logger.info(m) - summary_output = summary_output + m + "\n" - return summary_output - -def load(filename): - - """ - Reads a saved instance of the genetic algorithm: - -filename: Name of the file to read the instance. No extension is needed. - Returns the genetic algorithm instance. - """ - - try: - with open(filename + ".pkl", 'rb') as file: - ga_in = cloudpickle.load(file) - except FileNotFoundError: - raise FileNotFoundError("Error reading the file {filename}. Please check your inputs.".format(filename=filename)) - except: - # raise BaseException("Error loading the file. If the file already exists, please reload all the functions previously used (e.g. fitness function).") - raise BaseException("Error loading the file.") - return ga_in From bf87baad1109117a469bf20f441fa78e9be7b459 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:40:55 -0400 Subject: [PATCH 03/32] Delete __init__.py --- pygad/__init__.py | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 pygad/__init__.py diff --git a/pygad/__init__.py b/pygad/__init__.py deleted file mode 100644 index 71f207b..0000000 --- a/pygad/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .pygad import * # Relative import. - -__version__ = "3.0.0" From 8ee348f03a409d6643929c95c3d7ed961b23fad0 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:41:12 -0400 Subject: [PATCH 04/32] Rename __init__.py to pygad/__init__.py --- __init__.py => pygad/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename __init__.py => pygad/__init__.py (100%) diff --git a/__init__.py b/pygad/__init__.py similarity index 100% rename from __init__.py rename to pygad/__init__.py From 78daf35ec68b0392c8e2e742dab7ab75ede6869b Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:41:24 -0400 Subject: [PATCH 05/32] Rename pygad.py to pygad/pygad.py --- pygad.py => pygad/pygad.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pygad.py => pygad/pygad.py (100%) diff --git a/pygad.py b/pygad/pygad.py similarity index 100% rename from pygad.py rename to pygad/pygad.py From 7aedc6de0211756f3acd0598fb6052dd6256d699 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:21 -0400 Subject: [PATCH 06/32] Rename example.py to examples/example.py --- example.py => examples/example.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example.py => examples/example.py (100%) diff --git a/example.py b/examples/example.py similarity index 100% rename from example.py rename to examples/example.py From 34d3879cebbf0f3d98ff04da00c9b079cbc474d5 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:31 -0400 Subject: [PATCH 07/32] Rename lifecycle.py to examples/lifecycle.py --- lifecycle.py => examples/lifecycle.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lifecycle.py => examples/lifecycle.py (100%) diff --git a/lifecycle.py b/examples/lifecycle.py similarity index 100% rename from lifecycle.py rename to examples/lifecycle.py From 8f2c6d40b10f0169acc66b34c47166ed5c0eafca Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:33 -0400 Subject: [PATCH 08/32] Rename example_logger.py to examples/example_logger.py --- example_logger.py => examples/example_logger.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example_logger.py => examples/example_logger.py (100%) diff --git a/example_logger.py b/examples/example_logger.py similarity index 100% rename from example_logger.py rename to examples/example_logger.py From e356f3d0afaf8cb99196c5826bf271a0f7f3fc39 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:36 -0400 Subject: [PATCH 09/32] Rename example_custom_operators.py to examples/example_custom_operators.py --- .../example_custom_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename example_custom_operators.py => examples/example_custom_operators.py (96%) diff --git a/example_custom_operators.py b/examples/example_custom_operators.py similarity index 96% rename from example_custom_operators.py rename to examples/example_custom_operators.py index 0acd5a5..88d3e6a 100644 --- a/example_custom_operators.py +++ b/examples/example_custom_operators.py @@ -71,4 +71,4 @@ def mutation_func(offspring, ga_instance): mutation_type=mutation_func) ga_instance.run() -ga_instance.plot_fitness() \ No newline at end of file +ga_instance.plot_fitness() From ffc3bf90fb53f5b84abc2ebb1a26ce7d0e588e5b Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:38 -0400 Subject: [PATCH 10/32] Rename example_clustering_3.py to examples/example_clustering_3.py --- example_clustering_3.py => examples/example_clustering_3.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example_clustering_3.py => examples/example_clustering_3.py (100%) diff --git a/example_clustering_3.py b/examples/example_clustering_3.py similarity index 100% rename from example_clustering_3.py rename to examples/example_clustering_3.py From 28c64cd9956a4680a7ce440d491fe2404b88f39c Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:42:40 -0400 Subject: [PATCH 11/32] Rename example_clustering_2.py to examples/example_clustering_2.py --- example_clustering_2.py => examples/example_clustering_2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename example_clustering_2.py => examples/example_clustering_2.py (97%) diff --git a/example_clustering_2.py b/examples/example_clustering_2.py similarity index 97% rename from example_clustering_2.py rename to examples/example_clustering_2.py index 43c99df..fa14bb7 100644 --- a/example_clustering_2.py +++ b/examples/example_clustering_2.py @@ -119,4 +119,4 @@ def fitness_func(ga_instance, solution, solution_idx): matplotlib.pyplot.scatter(cluster_x, cluster_y) matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5) matplotlib.pyplot.title("Clustering using PyGAD") -matplotlib.pyplot.show() \ No newline at end of file +matplotlib.pyplot.show() From c63706a83340c4203d3da6a45710abdec30ba555 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 00:45:21 -0400 Subject: [PATCH 12/32] Create a workflow YAML file. --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1 @@ + From 88013f7910420805a6ec5f6550d88c0d78265c4f Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 16:00:03 -0400 Subject: [PATCH 13/32] Rename example_travelling_salesman.ipynb to examples/example_travelling_salesman.ipynb --- .../example_travelling_salesman.ipynb | 53070 ++++++++-------- 1 file changed, 26535 insertions(+), 26535 deletions(-) rename example_travelling_salesman.ipynb => examples/example_travelling_salesman.ipynb (98%) diff --git a/example_travelling_salesman.ipynb b/examples/example_travelling_salesman.ipynb similarity index 98% rename from example_travelling_salesman.ipynb rename to examples/example_travelling_salesman.ipynb index 179b94d..f59d57e 100644 --- a/example_travelling_salesman.ipynb +++ b/examples/example_travelling_salesman.ipynb @@ -1,26535 +1,26535 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "95ba26f7" - }, - "source": [ - "# The Travelling Coffee Drinker - Genetic Algorithm\n", - "\n", - "Solving a travelling salesman problem for United Kingdom Starbucks Cafés" - ], - "id": "95ba26f7" - }, - { - "cell_type": "markdown", - "source": [ - "## 1. Load and transform data\n", - "\n", - "The data comes from Kaggle, which is accessed using the API wrapper.\n", - "\n", - "The transformation needed is just to filter only GB Starbucks restaurants with a valid lon/lat pair." - ], - "metadata": { - "id": "sEtmfvLsvMC2" - }, - "id": "sEtmfvLsvMC2" - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_HBlGrIyYa7G", - "outputId": "cf89fc50-e566-4fe2-f54c-178b9d6e1acd" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: pygad==2.17 in /usr/local/lib/python3.7/dist-packages (2.17.0)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from pygad==2.17) (1.21.6)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from pygad==2.17) (3.2.2)\n", - "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (2.8.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (1.4.4)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (3.0.9)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (0.11.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->pygad==2.17) (4.1.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->pygad==2.17) (1.15.0)\n" - ] - } - ], - "source": [ - "!pip install pygad==2.17" - ], - "id": "_HBlGrIyYa7G" - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": { - "id": "buoFAtjz2duB" - }, - "outputs": [], - "source": [ - "!pip install -q kaggle" - ], - "id": "buoFAtjz2duB" - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 375, - "resources": { - "http://localhost:8080/nbextensions/google.colab/files.js": { - "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgZG8gewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwoKICAgICAgbGV0IHBlcmNlbnREb25lID0gZmlsZURhdGEuYnl0ZUxlbmd0aCA9PT0gMCA/CiAgICAgICAgICAxMDAgOgogICAgICAgICAgTWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCk7CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPSBgJHtwZXJjZW50RG9uZX0lIGRvbmVgOwoKICAgIH0gd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCk7CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", - "headers": [ - [ - "content-type", - "application/javascript" - ] - ], - "ok": true, - "status": 200, - "status_text": "" - } - } - }, - "id": "WYcYq9YV2gi9", - "outputId": "aef25ebb-cdaf-449f-c280-05f0faf1157f" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "error", - "ename": "KeyboardInterrupt", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mfiles\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36mupload\u001b[0;34m()\u001b[0m\n\u001b[1;32m 39\u001b[0m \"\"\"\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0muploaded_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_upload_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmultiple\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;31m# Mapping from original filename to filename as saved locally.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0mlocal_filenames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36m_upload_files\u001b[0;34m(multiple)\u001b[0m\n\u001b[1;32m 116\u001b[0m result = _output.eval_js(\n\u001b[1;32m 117\u001b[0m 'google.colab._files._uploadFiles(\"{input_id}\", \"{output_id}\")'.format(\n\u001b[0;32m--> 118\u001b[0;31m input_id=input_id, output_id=output_id))\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_collections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefaultdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/output/_js.py\u001b[0m in \u001b[0;36meval_js\u001b[0;34m(script, ignore_result, timeout_sec)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mignore_result\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_message\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_next_input_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_NOT_READY\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.025\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m if (reply.get('type') == 'colab_reply' and\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "from google.colab import files\n", - "files.upload() # upload a Kaggle JSON file to make request for data " - ], - "id": "WYcYq9YV2gi9" - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TBtskqtn29H3" - }, - "outputs": [], - "source": [ - "!mkdir kaggle " - ], - "id": "TBtskqtn29H3" - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "oK1Z1Yr12tEv" - }, - "outputs": [], - "source": [ - "!cp kaggle.json ~/.kaggle/\n", - "!chmod 600 ~/.kaggle/kaggle.json" - ], - "id": "oK1Z1Yr12tEv" - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8U2rpJ1j191n" - }, - "outputs": [], - "source": [ - "!kaggle datasets download kukuroo3/starbucks-locations-worldwide-2021-version -p /content/sample_data/ --unzip" - ], - "id": "8U2rpJ1j191n" - }, - { - "cell_type": "code", - "execution_count": 165, - "metadata": { - "id": "sTkY5cLb3age", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "a064aedc-8e4d-4712-d223-e35db3d1d7ae" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Index(['Unnamed: 0', 'storeNumber', 'countryCode', 'ownershipTypeCode',\n", - " 'schedule', 'slug', 'latitude', 'longitude', 'streetAddressLine1',\n", - " 'streetAddressLine2', 'streetAddressLine3', 'city',\n", - " 'countrySubdivisionCode', 'postalCode', 'currentTimeOffset',\n", - " 'windowsTimeZoneId', 'olsonTimeZoneId'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "execution_count": 165 - } - ], - "source": [ - "import pandas as pd \n", - "\n", - "# read in data and check column names \n", - "data = pd.read_csv('/content/sample_data/startbucks.csv')\n", - "data.columns" - ], - "id": "sTkY5cLb3age" - }, - { - "cell_type": "code", - "execution_count": 166, - "metadata": { - "id": "SImMYnfe3n_w", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "4ee033d9-9f66-489a-c470-b45a1a3b0ac0" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0" - ] - }, - "metadata": {}, - "execution_count": 166 - } - ], - "source": [ - "df = data[data['countryCode']=='GB']\n", - "df.reset_index(inplace=True)\n", - "\n", - "# check for invalid lon/lat pairs\n", - "len(df.dropna(subset=['latitude', 'longitude'])) - len(df)" - ], - "id": "SImMYnfe3n_w" - }, - { - "cell_type": "markdown", - "source": [ - "## 2. Exploratory analysis\n", - "\n", - "Find the distribution of cafés across the United Kingdom. \n", - "\n", - "How are restaurants distributed across towns?\n", - "What does a geospatial representation of the data look like?" - ], - "metadata": { - "id": "ovWqqNFIvydy" - }, - "id": "ovWqqNFIvydy" - }, - { - "cell_type": "markdown", - "source": [ - "### 2.1 Distribution of cafés by town" - ], - "metadata": { - "id": "SokLPN9j2fn1" - }, - "id": "SokLPN9j2fn1" - }, - { - "cell_type": "code", - "source": [ - "import plotly.express as px\n", - "vis = df.groupby('city').storeNumber.count().reset_index()\n", - "px.bar(vis, x='city', y='storeNumber', template='seaborn')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 0 - }, - "id": "HGOBgqnX2lIo", - "outputId": "0f19d0f2-0063-458a-cd02-987e30b3d9cf" - }, - "id": "HGOBgqnX2lIo", - "execution_count": 167, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "### 2.2 Map of cafés in the UK" - ], - "metadata": { - "id": "z9a3NPSM2lb4" - }, - "id": "z9a3NPSM2lb4" - }, - { - "cell_type": "code", - "source": [ - "import folium" - ], - "metadata": { - "id": "dAq1AQyfwN_l" - }, - "id": "dAq1AQyfwN_l", - "execution_count": 168, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")" - ], - "metadata": { - "id": "V9b0_8g_xM5K" - }, - "id": "V9b0_8g_xM5K", - "execution_count": 169, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "for _, r in df.iterrows():\n", - " folium.Marker(\n", - " [r['latitude'], r['longitude']], popup=f'{r[\"storeNumber\"]}'\n", - " ).add_to(map)" - ], - "metadata": { - "id": "ZiprKw6ExhQR" - }, - "id": "ZiprKw6ExhQR", - "execution_count": 170, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "map" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 0 - }, - "id": "iVnEBooHc0ze", - "outputId": "b6ee729b-739f-4db5-975f-12a6a2c82246" - }, - "id": "iVnEBooHc0ze", - "execution_count": 171, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "
Make this Notebook Trusted to load map: File -> Trust Notebook
" - ] - }, - "metadata": {}, - "execution_count": 171 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 3. Testing the distance methodology\n", - "\n", - "To assess how good each solution is there needs to be a measure of fitness. For the purpose of this example the distance 'as the crow flies' is used without taking into account actual road distances however this could be explored in future." - ], - "metadata": { - "id": "1sg-1A4Ih1L4" - }, - "id": "1sg-1A4Ih1L4" - }, - { - "cell_type": "code", - "source": [ - "from geopy.distance import geodesic" - ], - "metadata": { - "id": "IbG9XRkLh0Aj" - }, - "id": "IbG9XRkLh0Aj", - "execution_count": 173, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "The tested origin is the first Starbucks in the data and the destination is the second Starbucks in the dataset." - ], - "metadata": { - "id": "zgmh6b1l3liN" - }, - "id": "zgmh6b1l3liN" - }, - { - "cell_type": "code", - "execution_count": 174, - "metadata": { - "id": "TyoeO2I-4bGq" - }, - "outputs": [], - "source": [ - "origin = (df['latitude'][0], df['longitude'][0])\n", - "dest = (df['latitude'][100], df['longitude'][100])" - ], - "id": "TyoeO2I-4bGq" - }, - { - "cell_type": "markdown", - "source": [ - "The distance between the two points as the crow flies in kilometres is given below." - ], - "metadata": { - "id": "VUr7ZAzw3srI" - }, - "id": "VUr7ZAzw3srI" - }, - { - "cell_type": "code", - "execution_count": 175, - "metadata": { - "id": "zXo1I-5Q4Lwn", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "4672edf2-9142-4276-ab6a-2b35b65b8b3d" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "81.63683980420957" - ] - }, - "metadata": {}, - "execution_count": 175 - } - ], - "source": [ - "geodesic(origin, dest).kilometers" - ], - "id": "zXo1I-5Q4Lwn" - }, - { - "cell_type": "markdown", - "source": [ - "## 4. Preparing data structures\n", - "\n", - "The data structures needed for testing solutions are the \"genes\" or store options to select from named *genes*\n", - "\n", - "A lookup to access these genes known as *stores* \n", - "\n", - "A *check_range* which is used to check that every option is given in a solution (a key criteria in the TSP).\n" - ], - "metadata": { - "id": "GeRIsd5G378L" - }, - "id": "GeRIsd5G378L" - }, - { - "cell_type": "code", - "source": [ - "test = df.head(10)\n", - "genes = {store_num:[lat, lon] for store_num, lat, lon in zip(test['storeNumber'], test['latitude'], test['longitude'])}\n", - "stores = list(genes.keys())\n", - "check_range = [i for i in range(0, 10)]" - ], - "metadata": { - "id": "rkKStcn4iIrN" - }, - "id": "rkKStcn4iIrN", - "execution_count": 176, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## 5. Defining functions \n", - "\n", - "The algorithm requires a set of functions to be pre-defined as the out of the box genetic algorithm does not support a TSP.\n", - "\n", - " 1. build_population: builds a population of chromosomes to test with proper restrictions applied\n", - " 2. fitness_func: Used to test a solution to see how well it performs, in this case the fitness_func will be assessed based on the distance as the crow flies between each successive point\n", - " 3. pmx_crossover: performs the crossover of a parent and child with proper Partially Matched Crossover (PMX) logic\n", - " 4. crossover_func: applies the crossover\n", - " 5. on_crossover: applies the mutation after crossover\n", - " 6. on_generation: used to print the progress and results at each generation" - ], - "metadata": { - "id": "5mKC2lKO4tRY" - }, - "id": "5mKC2lKO4tRY" - }, - { - "cell_type": "code", - "source": [ - "import random\n", - "import numpy as np\n", - "from geopy.distance import geodesic" - ], - "metadata": { - "id": "15-IiuRNljOC" - }, - "id": "15-IiuRNljOC", - "execution_count": 177, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Assess the quality or fitness of a solution so that only the fittest are selected for the next generation and to breed." - ], - "metadata": { - "id": "BhCEN9165in3" - }, - "id": "BhCEN9165in3" - }, - { - "cell_type": "code", - "source": [ - "def build_population(size, chromosome_size):\n", - " population = []\n", - " for i in range(size):\n", - " home_city = 0\n", - " added = {home_city:'Added'}\n", - " chromosome = [home_city]\n", - "\n", - " while len(chromosome) < chromosome_size:\n", - " proposed_gene = random.randint(0, chromosome_size-1)\n", - " if added.get(proposed_gene) is None:\n", - " chromosome.append(proposed_gene)\n", - " added.update({proposed_gene:'Added'})\n", - " else:\n", - " pass\n", - "\n", - " chromosome.append(home_city)\n", - "\n", - " population.append(chromosome)\n", - "\n", - " return np.array(population)" - ], - "metadata": { - "id": "BLz2sU2n78Ui" - }, - "id": "BLz2sU2n78Ui", - "execution_count": 178, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "population = build_population(100, 10)\n", - "population.shape" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "vs8CrTNV9iqe", - "outputId": "4178c652-2593-44e9-ca0d-1829f11c6d5e" - }, - "id": "vs8CrTNV9iqe", - "execution_count": 179, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(100, 11)" - ] - }, - "metadata": {}, - "execution_count": 179 - } - ] - }, - { - "cell_type": "code", - "source": [ - "def fitness_func(solution, solution_idx):\n", - " # loop through the length of the chromosome finding the distance between each\n", - " # gene added \n", - "\n", - " # to increment\n", - " total_dist = 0\n", - "\n", - " for gene in range(0, len(solution)):\n", - "\n", - " # get the lon lat of the two points\n", - " a = genes.get(stores[solution[gene]])\n", - " \n", - " try:\n", - " b = genes.get(stores[solution[gene + 1]])\n", - "\n", - " # find the distance (crow flies)\n", - " dist = geodesic(a, b).kilometers\n", - "\n", - " except IndexError:\n", - " dist = 0\n", - "\n", - " total_dist += dist\n", - "\n", - " # to optimise this value in the positive direction the inverse of dist is used\n", - " fitness = 1 / total_dist\n", - "\n", - " return fitness " - ], - "metadata": { - "id": "5u-5msoj-84i" - }, - "id": "5u-5msoj-84i", - "execution_count": 180, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def pmx_crossover(parent1, parent2, sequence_start, sequence_end):\n", - " # initialise a child\n", - " child = np.zeros(parent1.shape[0])\n", - "\n", - " # get the genes for parent one that are passed on to child one\n", - " parent1_to_child1_genes = parent1[sequence_start:sequence_end]\n", - "\n", - " # get the position of genes for each respective combination\n", - " parent1_to_child1 = np.isin(parent1,parent1_to_child1_genes).nonzero()[0]\n", - "\n", - " for gene in parent1_to_child1:\n", - " child[gene] = parent1[gene]\n", - "\n", - " # gene of parent 2 not in the child\n", - " genes_not_in_child = parent2[np.isin(parent2, parent1_to_child1_genes, invert=True).nonzero()[0]]\n", - " \n", - " # if the gene is not already\n", - " if genes_not_in_child.shape[0] >= 1:\n", - " for gene in genes_not_in_child:\n", - " if gene >= 1:\n", - " lookup = gene\n", - " not_in_sequence = True\n", - "\n", - " while not_in_sequence:\n", - " position_in_parent2 = np.where(parent2==lookup)[0][0]\n", - "\n", - " if position_in_parent2 in range(sequence_start, sequence_end):\n", - " lookup = parent1[position_in_parent2]\n", - "\n", - " else:\n", - " child[position_in_parent2] = gene\n", - " not_in_sequence = False\n", - "\n", - " return child" - ], - "metadata": { - "id": "OpbfyShQes_q" - }, - "id": "OpbfyShQes_q", - "execution_count": 181, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def crossover_func(parents, offspring_size, ga_instance):\n", - " offspring = []\n", - " idx = 0\n", - " while len(offspring) != offspring_size[0]:\n", - "\n", - " # locate the parents\n", - " parent1 = parents[idx % parents.shape[0], :].copy()\n", - " parent2 = parents[(idx + 1) % parents.shape[0], :].copy()\n", - "\n", - " # find gene sequence in parent 1 \n", - " sequence_start = random.randint(1, parent1.shape[0]-4)\n", - " sequence_end = random.randint(sequence_start, parent1.shape[0]-1)\n", - "\n", - " # perform crossover\n", - " child1 = pmx_crossover(parent1, parent2, sequence_start, sequence_end)\n", - " child2 = pmx_crossover(parent2, parent1, sequence_start, sequence_end) \n", - "\n", - " offspring.append(child1)\n", - " offspring.append(child2)\n", - "\n", - "\n", - " idx += 1\n", - "\n", - " return np.array(offspring)" - ], - "metadata": { - "id": "shgFWqH2NinO" - }, - "id": "shgFWqH2NinO", - "execution_count": 182, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "The mutation function chosen is inversion as it does not invalidate the solution." - ], - "metadata": { - "id": "bCjaaeofA7Bg" - }, - "id": "bCjaaeofA7Bg" - }, - { - "cell_type": "code", - "source": [ - "def mutation_func(offspring, ga_instance):\n", - "\n", - " for chromosome_idx in range(offspring.shape[0]):\n", - " # define a sequence of genes to reverse\n", - " sequence_start = random.randint(1, offspring[chromosome_idx].shape[0] - 2)\n", - " sequence_end = random.randint(sequence_start, offspring[chromosome_idx].shape[0] - 1)\n", - " \n", - " genes = offspring[chromosome_idx, sequence_start:sequence_end]\n", - "\n", - " # start at the start of the sequence assigning the reverse sequence back to the chromosome\n", - " index = 0\n", - " if len(genes) > 0:\n", - " for gene in range(sequence_start, sequence_end):\n", - "\n", - " offspring[chromosome_idx, gene] = genes[index]\n", - "\n", - " index += 1\n", - "\n", - " return offspring" - ], - "metadata": { - "id": "0kMm7J1WAsvH" - }, - "id": "0kMm7J1WAsvH", - "execution_count": 183, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Used in the genetic algorithm flow to apply the custom mutation after crossover" - ], - "metadata": { - "id": "HTeKwuPl5vVt" - }, - "id": "HTeKwuPl5vVt" - }, - { - "cell_type": "code", - "source": [ - "def on_crossover(ga_instance, offspring_crossover):\n", - " # apply mutation to ensure uniqueness \n", - " offspring_mutation = mutation_func(offspring_crossover, ga_instance)\n", - "\n", - " # save the new offspring set as the parents of the next generation\n", - " ga_instance.last_generation_offspring_mutation = offspring_mutation" - ], - "metadata": { - "id": "ucwYe4rgwLQC" - }, - "id": "ucwYe4rgwLQC", - "execution_count": 184, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Added for debugging and assessing progress by generation at runtime" - ], - "metadata": { - "id": "Xf7rgtuO532X" - }, - "id": "Xf7rgtuO532X" - }, - { - "cell_type": "code", - "source": [ - "def on_generation(ga):\n", - " print(\"Generation\", ga.generations_completed)\n", - " print(ga.population)" - ], - "metadata": { - "id": "eqRwVsSBMf_B" - }, - "id": "eqRwVsSBMf_B", - "execution_count": 185, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## 6. Executing the algorithm\n", - "\n", - "The genetic algorithm is set up as instance and at initialisation several parameters are given. \n", - "\n", - "The algorithm then runs to find the best solution for a set number of generations." - ], - "metadata": { - "id": "C9AtXoqx58x0" - }, - "id": "C9AtXoqx58x0" - }, - { - "cell_type": "code", - "source": [ - "import pygad" - ], - "metadata": { - "id": "BrJVLZlBW07R" - }, - "id": "BrJVLZlBW07R", - "execution_count": 186, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### 6.1 Example Initialising the algorithm\n", - "\n", - "The algorithm is initialised below.\n", - "\n", - "Notable parameters include:\n", - " - The use of gene space to limit the possible genes chosen to just be those in the TSP range\n", - " - Mutations being turned off temporarily\n", - " - Implementation of custom on_ functions \n", - " - Allow duplication of genes parameter set to false to ensure any newly introduced chromosomes/chromosomes created as population is initialised have no duplicate genes" - ], - "metadata": { - "id": "6ni9VkQv6TJR" - }, - "id": "6ni9VkQv6TJR" - }, - { - "cell_type": "code", - "source": [ - "ga_instance = pygad.GA(num_generations=100,\n", - " num_parents_mating=40,\n", - " fitness_func=fitness_func,\n", - " sol_per_pop=200,\n", - " initial_population=population,\n", - " gene_space=range(0, 10),\n", - " gene_type=int,\n", - " mutation_type=mutation_func,\n", - " on_generation=on_generation,\n", - " crossover_type=crossover_func, \n", - " keep_parents=6,\n", - " mutation_probability=0.4)" - ], - "metadata": { - "id": "FfFdncf-G3Mr" - }, - "id": "FfFdncf-G3Mr", - "execution_count": 187, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### 6.2 Running the algorithm \n", - "\n", - "The genetic algorithm is run with a simple function call" - ], - "metadata": { - "id": "sfqme_5461A4" - }, - "id": "sfqme_5461A4" - }, - { - "cell_type": "code", - "source": [ - "ga_instance.run()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "g-3CjxWYZh3H", - "outputId": "dc1fbda5-6b22-4245-9b99-e22b0e41684f" - }, - "id": "g-3CjxWYZh3H", - "execution_count": 188, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Generation 1\n", - "[[0 3 2 ... 4 5 0]\n", - " [0 3 6 ... 1 2 0]\n", - " [0 8 3 ... 6 1 0]\n", - " ...\n", - " [0 9 5 ... 7 4 0]\n", - " [0 2 7 ... 8 6 0]\n", - " [0 3 5 ... 6 8 0]]\n", - "Generation 2\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 9 ... 8 3 0]\n", - " [0 3 2 ... 4 5 0]\n", - " ...\n", - " [0 3 6 ... 1 2 0]\n", - " [0 3 6 ... 1 2 0]\n", - " [0 3 1 ... 6 2 0]]\n", - "Generation 3\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 9 ... 8 3 0]\n", - " [0 3 2 ... 4 5 0]\n", - " [0 9 2 ... 8 3 0]]\n", - "Generation 4\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 5\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 6\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 7\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 8\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 9\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 10\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 11\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 12\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 13\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 14\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 15\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 16\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 17\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 18\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 19\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 20\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 21\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 22\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 23\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 24\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 25\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 26\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 27\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 28\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 29\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 30\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 31\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 32\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 33\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 34\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 35\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 36\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 37\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 38\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 39\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 40\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 41\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 42\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 43\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 44\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 45\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 46\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 47\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 48\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 49\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 50\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 51\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 52\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 53\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 54\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 55\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 56\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 57\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 58\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 59\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 60\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 61\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 62\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 63\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 64\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 65\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 66\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 67\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 68\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 69\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 70\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 71\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 72\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 73\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 74\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 75\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 76\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 77\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 78\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 79\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 80\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 81\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 82\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 83\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 84\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 85\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 86\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 87\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 88\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 89\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 90\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 91\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 92\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 93\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 94\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 95\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 96\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 97\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 98\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 99\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n", - "Generation 100\n", - "[[0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " ...\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]\n", - " [0 1 2 ... 8 6 0]]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 7. Assessing results \n", - "\n", - "The result solution can be checked and analysed using the ga_instance itself" - ], - "metadata": { - "id": "woJTWlOrYLwl" - }, - "id": "woJTWlOrYLwl" - }, - { - "cell_type": "code", - "source": [ - "solution, solution_fitness, solution_idx = ga_instance.best_solution()" - ], - "metadata": { - "id": "BZxvMCZ-aFtJ" - }, - "id": "BZxvMCZ-aFtJ", - "execution_count": 189, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "solution, solution_fitness, solution_idx = ga_instance.best_solution()\n", - "print(f'Generation of best solution: {ga_instance.best_solution_generation}')\n", - "print(\"Fitness value of the best solution = {solution_fitness}\".format(solution_fitness=solution_fitness))\n", - "print(\"Index of the best solution : {solution_idx}\".format(solution_idx=solution_idx))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IOTGnna-QXsB", - "outputId": "30d494d6-e88e-4061-a453-9a44a9409176" - }, - "id": "IOTGnna-QXsB", - "execution_count": 190, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Generation of best solution: 1\n", - "Fitness value of the best solution = 0.010681933534441102\n", - "Index of the best solution : 0\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "if ga_instance.best_solution_generation != -1:\n", - " print(\"Best fitness value reached after {best_solution_generation} generations.\".format(best_solution_generation=ga_instance.best_solution_generation))\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WsjTUVloQhez", - "outputId": "fdd41ccb-9ead-49d7-d180-ccc1dd2d13f9" - }, - "id": "WsjTUVloQhez", - "execution_count": 191, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Best fitness value reached after 1 generations.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### 7.1 Verifying a solution\n", - "\n", - "For a solution to be valid it needs to have:\n", - " - A maximum gene value that matches the total number of stores \n", - " - A minimum gene value of 0 \n", - " - Each gene must be unique" - ], - "metadata": { - "id": "FiLlGlILYWhw" - }, - "id": "FiLlGlILYWhw" - }, - { - "cell_type": "code", - "source": [ - "def verify_solution(solution, max_gene):\n", - " if min(solution) != 0:\n", - " print('Failed values below 0')\n", - "\n", - " if max(solution) != max_gene:\n", - " print('Failed values less than or above max possible value')\n", - "\n", - " if len(set(solution)) - len(solution) != -1:\n", - " print(len(set(solution)) - len(solution))\n", - " print('Failed solution does not contain unique values')" - ], - "metadata": { - "id": "vEhveNG4VQBF" - }, - "id": "vEhveNG4VQBF", - "execution_count": 192, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "verify_solution(solution, 9)" - ], - "metadata": { - "id": "IQg1sULKYRZe" - }, - "id": "IQg1sULKYRZe", - "execution_count": 193, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "solution" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VqjdGMGnYqz-", - "outputId": "152c6b82-4787-4776-b9e9-bc3ceb1ccec1" - }, - "id": "VqjdGMGnYqz-", - "execution_count": 194, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([0, 1, 2, 3, 4, 5, 7, 9, 8, 6, 0])" - ] - }, - "metadata": {}, - "execution_count": 194 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### 7.2 Interpreting the result \n", - "\n", - "The result sequence can be used to access latitude and longitude for each store in the solution." - ], - "metadata": { - "id": "5ixLDfvtY3cI" - }, - "id": "5ixLDfvtY3cI" - }, - { - "cell_type": "code", - "source": [ - "points = [genes.get(stores[id]) + [stores[id]] for id in solution]\n", - "points[:5]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tv-AjzcoQwGc", - "outputId": "a5187ac8-ea84-4dc9-992c-60cfa4dcab8a" - }, - "id": "tv-AjzcoQwGc", - "execution_count": 195, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[51.483556, -1.557143, '9155-152277'],\n", - " [51.482387, -1.555109, '22194-218828'],\n", - " [51.481264, -1.556526, '18362-190424'],\n", - " [51.481177, -1.557422, '9136-152279'],\n", - " [51.562617, -1.798111, '47832-260044']]" - ] - }, - "metadata": {}, - "execution_count": 195 - } - ] - }, - { - "cell_type": "code", - "source": [ - "import folium \n", - "\n", - "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")\n", - "\n", - "for point in range(0, len(points)):\n", - " folium.Marker(\n", - " [points[point][0], points[point][1]], popup=f'{points[point][2]}'\n", - " ).add_to(map)\n", - "\n", - " try:\n", - " folium.PolyLine([(points[point][0], points[point][1]), \n", - " (points[point+1][0], points[point+1][1])],\n", - " color='red',\n", - " weight=5,\n", - " opacity=0.8).add_to(map)\n", - "\n", - " except IndexError:\n", - " pass\n", - " " - ], - "metadata": { - "id": "Tq0hTc5cstWX" - }, - "id": "Tq0hTc5cstWX", - "execution_count": 196, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "map" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 0 - }, - "id": "ebkOuHleuGU1", - "outputId": "801c457d-49d9-452d-9c4b-872aef5816d4" - }, - "id": "ebkOuHleuGU1", - "execution_count": 197, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "
Make this Notebook Trusted to load map: File -> Trust Notebook
" - ] - }, - "metadata": {}, - "execution_count": 197 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "The map shows the shortest path that has been found. So that the travelling coffee drinker can maximise the time on coffee and minimise the time on travelling.\n", - "\n", - "Now the algorithm can be scaled up for the whole of the UK, or tailored to just one town. An example of the solution scaled to the UK is given below." - ], - "metadata": { - "id": "5Sz8ykeEYDS3" - }, - "id": "5Sz8ykeEYDS3" - }, - { - "cell_type": "markdown", - "source": [ - "## 8. Scaling up the solution\n", - "\n", - "This is where the fun begins!" - ], - "metadata": { - "id": "gscT4SjxZ9h4" - }, - "id": "gscT4SjxZ9h4" - }, - { - "cell_type": "code", - "source": [ - "df = df[df['city'] == 'London']\n", - "genes = {store_num:[lat, lon] for store_num, lat, lon in zip(df['storeNumber'], df['latitude'], df['longitude'])}\n", - "stores = list(genes.keys())\n", - "len(stores)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yYY3-gP1aFkH", - "outputId": "907e2a7c-3292-4409-e83e-4db372f09a45" - }, - "id": "yYY3-gP1aFkH", - "execution_count": 157, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "165" - ] - }, - "metadata": {}, - "execution_count": 157 - } - ] - }, - { - "cell_type": "code", - "source": [ - "population = build_population(200, 165)\n", - "len(population[0])" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "oykOz7l4KhJE", - "outputId": "fe7fdabc-a909-40c4-e69a-daba8169b07c" - }, - "id": "oykOz7l4KhJE", - "execution_count": 155, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "166" - ] - }, - "metadata": {}, - "execution_count": 155 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### 8.1 Building the final algorithm\n", - "\n", - "The code to build the algorithm has to be re-run with the above data structures altered." - ], - "metadata": { - "id": "ioy8EGjEbnBJ" - }, - "id": "ioy8EGjEbnBJ" - }, - { - "cell_type": "code", - "source": [ - "def fitness_func(solution, solution_idx):\n", - " # loop through the length of the chromosome finding the distance between each\n", - " # gene added \n", - "\n", - " # to increment\n", - " total_dist = 0\n", - "\n", - " for gene in range(0, len(solution)):\n", - "\n", - " # get the lon lat of the two points\n", - " a = genes.get(stores[solution[gene]])\n", - " \n", - " try:\n", - " b = genes.get(stores[solution[gene + 1]])\n", - "\n", - " # find the distance (crow flies)\n", - " dist = geodesic(a, b).kilometers\n", - "\n", - " except IndexError:\n", - " dist = 0\n", - "\n", - " total_dist += dist\n", - "\n", - " # to optimise this value in the positive direction the inverse of dist is used\n", - " fitness = 1 / total_dist\n", - "\n", - " return fitness " - ], - "metadata": { - "id": "uBfcikkma5hP" - }, - "id": "uBfcikkma5hP", - "execution_count": 108, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def pmx_crossover(parent1, parent2, sequence_start, sequence_end):\n", - " # initialise a child\n", - " child = np.zeros(parent1.shape[0])\n", - "\n", - " # get the genes for parent one that are passed on to child one\n", - " parent1_to_child1_genes = parent1[sequence_start:sequence_end]\n", - "\n", - " # get the position of genes for each respective combination\n", - " parent1_to_child1 = np.isin(parent1,parent1_to_child1_genes).nonzero()[0]\n", - "\n", - " for gene in parent1_to_child1:\n", - " child[gene] = parent1[gene]\n", - "\n", - " # gene of parent 2 not in the child\n", - " genes_not_in_child = parent2[np.isin(parent2, parent1_to_child1_genes, invert=True).nonzero()[0]]\n", - " \n", - " if genes_not_in_child.shape[0] >= 1:\n", - " for gene in genes_not_in_child:\n", - " if gene >= 1:\n", - " lookup = gene\n", - " not_in_sequence = True\n", - "\n", - " while not_in_sequence:\n", - " position_in_parent2 = np.where(parent2==lookup)[0][0]\n", - "\n", - " if position_in_parent2 in range(sequence_start, sequence_end):\n", - " lookup = parent1[position_in_parent2]\n", - "\n", - " else:\n", - " child[position_in_parent2] = gene\n", - " not_in_sequence = False\n", - "\n", - " return child" - ], - "metadata": { - "id": "FuOkiStta7Pz" - }, - "id": "FuOkiStta7Pz", - "execution_count": 109, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def crossover_func(parents, offspring_size, ga_instance):\n", - " offspring = []\n", - " idx = 0\n", - " while len(offspring) != offspring_size[0]:\n", - "\n", - " # locate the parents\n", - " parent1 = parents[idx % parents.shape[0], :].copy()\n", - " parent2 = parents[(idx + 1) % parents.shape[0], :].copy()\n", - "\n", - " # find gene sequence in parent 1 \n", - " sequence_start = random.randint(1, parent1.shape[0]-4)\n", - " sequence_end = random.randint(sequence_start, parent1.shape[0]-1)\n", - "\n", - " # perform crossover\n", - " child1 = pmx_crossover(parent1, parent2, sequence_start, sequence_end)\n", - " child2 = pmx_crossover(parent2, parent1, sequence_start, sequence_end)\n", - " \n", - "\n", - " offspring.append(child1)\n", - " offspring.append(child2)\n", - "\n", - " idx += 1\n", - "\n", - " return np.array(offspring)" - ], - "metadata": { - "id": "O10ZgScUa_bj" - }, - "id": "O10ZgScUa_bj", - "execution_count": 130, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def mutation_func(offspring, ga_instance):\n", - "\n", - " for chromosome_idx in range(offspring.shape[0]):\n", - " # define a sequence of genes to reverse\n", - " sequence_start = random.randint(1, offspring[chromosome_idx].shape[0] - 2)\n", - " sequence_end = random.randint(sequence_start, offspring[chromosome_idx].shape[0] - 1)\n", - " \n", - " genes = offspring[chromosome_idx, sequence_start:sequence_end]\n", - "\n", - " # start at the start of the sequence assigning the reverse sequence back to the chromosome\n", - " index = 0\n", - " if len(genes) > 0:\n", - " for gene in range(sequence_start, sequence_end):\n", - "\n", - " offspring[chromosome_idx, gene] = genes[index]\n", - "\n", - " index += 1\n", - "\n", - " return offspring" - ], - "metadata": { - "id": "mLLY7Ub4K_Y5" - }, - "id": "mLLY7Ub4K_Y5", - "execution_count": 144, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def on_crossover(ga_instance, offspring_crossover):\n", - " # apply mutation to ensure uniqueness \n", - " offspring_mutation = mutation_func(offspring_crossover, ga_instance)\n", - "\n", - " # save the new offspring set as the parents of the next generation\n", - " ga_instance.last_generation_offspring_mutation = offspring_mutation" - ], - "metadata": { - "id": "QLtP6in4LFSw" - }, - "id": "QLtP6in4LFSw", - "execution_count": 126, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def on_generation(ga):\n", - " print(\"Generation\", ga.generations_completed)\n", - " print(ga.population)" - ], - "metadata": { - "id": "SnR2LaDJLGRj" - }, - "id": "SnR2LaDJLGRj", - "execution_count": 127, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "ga_instance = pygad.GA(num_generations=100,\n", - " num_parents_mating=40,\n", - " fitness_func=fitness_func,\n", - " sol_per_pop=200,\n", - " initial_population=population,\n", - " gene_space=range(0, 165),\n", - " gene_type=int,\n", - " mutation_type=mutation_func,\n", - " on_generation=on_generation,\n", - " crossover_type=crossover_func, \n", - " keep_parents=6,\n", - " mutation_probability=0.4)" - ], - "metadata": { - "id": "j2J5jlh9bDxR" - }, - "id": "j2J5jlh9bDxR", - "execution_count": 145, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "ga_instance.run()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lhW7JkMAbS6E", - "outputId": "c6130b2a-1e6b-4b7d-c2d6-f891b178fef9" - }, - "id": "lhW7JkMAbS6E", - "execution_count": 146, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Generation 1\n", - "[[ 0 1 111 ... 127 108 0]\n", - " [ 0 62 141 ... 26 161 0]\n", - " [ 0 137 155 ... 158 3 0]\n", - " ...\n", - " [ 0 142 162 ... 2 159 0]\n", - " [ 0 161 159 ... 112 66 0]\n", - " [ 0 152 108 ... 72 58 0]]\n", - "Generation 2\n", - "[[ 0 1 111 ... 127 108 0]\n", - " [ 0 1 111 ... 127 108 0]\n", - " [ 0 62 141 ... 26 161 0]\n", - " ...\n", - " [ 0 137 155 ... 135 76 0]\n", - " [ 0 137 155 ... 158 3 0]\n", - " [ 0 96 40 ... 135 5 0]]\n", - "Generation 3\n", - "[[ 0 1 145 ... 26 94 0]\n", - " [ 0 1 111 ... 127 108 0]\n", - " [ 0 1 111 ... 127 108 0]\n", - " ...\n", - " [ 0 89 155 ... 158 32 0]\n", - " [ 0 1 110 ... 127 94 0]\n", - " [ 0 96 40 ... 64 90 0]]\n", - "Generation 4\n", - "[[ 0 1 56 ... 26 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 110 ... 127 94 0]\n", - " ...\n", - " [ 0 1 111 ... 127 108 0]\n", - " [ 0 1 145 ... 26 22 0]\n", - " [ 0 1 77 ... 127 142 0]]\n", - "Generation 5\n", - "[[ 0 1 111 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 56 ... 127 81 0]\n", - " ...\n", - " [ 0 1 60 ... 127 118 0]\n", - " [ 0 1 154 ... 127 7 0]\n", - " [ 0 1 164 ... 127 94 0]]\n", - "Generation 6\n", - "[[ 0 1 56 ... 26 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 93 ... 127 108 0]\n", - " [ 0 1 30 ... 127 108 0]\n", - " [ 0 1 111 ... 127 108 0]]\n", - "Generation 7\n", - "[[ 0 1 56 ... 127 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 56 ... 26 81 0]\n", - " [ 0 1 56 ... 127 81 0]]\n", - "Generation 8\n", - "[[ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 26 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 56 ... 127 94 0]]\n", - "Generation 9\n", - "[[ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 33 ... 26 94 0]\n", - " [ 0 1 110 ... 127 108 0]]\n", - "Generation 10\n", - "[[ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 26 108 0]]\n", - "Generation 11\n", - "[[ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 81 0]]\n", - "Generation 12\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " [ 0 1 164 ... 127 81 0]]\n", - "Generation 13\n", - "[[ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 81 0]]\n", - "Generation 14\n", - "[[ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " ...\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 108 0]]\n", - "Generation 15\n", - "[[ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 94 0]\n", - " [ 0 1 164 ... 127 108 0]]\n", - "Generation 16\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 81 0]\n", - " ...\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 94 0]]\n", - "Generation 17\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 81 0]\n", - " [ 0 1 164 ... 127 108 0]\n", - " [ 0 1 164 ... 127 108 0]]\n", - "Generation 18\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 108 0]]\n", - "Generation 19\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 20\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 21\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 22\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 23\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 24\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 25\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 26\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 27\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 28\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 29\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 30\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 31\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 32\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 33\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 34\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 35\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 36\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 37\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 38\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 39\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 40\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 41\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 42\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 43\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 44\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 45\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 46\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 47\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 48\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 49\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 50\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 51\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 52\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 53\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 54\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 55\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 56\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 57\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 58\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 59\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 60\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 61\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 62\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 63\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 64\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 65\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 66\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 67\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 68\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 69\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 70\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 71\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 72\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 73\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 74\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 75\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 76\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 77\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 78\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 79\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 80\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 81\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 82\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 83\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 84\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 85\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 86\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 87\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 88\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 89\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 90\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 91\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 92\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 93\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 94\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 95\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 96\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 97\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 98\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 99\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n", - "Generation 100\n", - "[[ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " ...\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]\n", - " [ 0 1 164 ... 127 119 0]]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 8.2 Evaluating the final algorithm \n", - "\n", - "The overall solution can now be assessed." - ], - "metadata": { - "id": "KJY5PcFabWdL" - }, - "id": "KJY5PcFabWdL" - }, - { - "cell_type": "code", - "source": [ - "solution, solution_fitness, solution_idx = ga_instance.best_solution()\n", - "print(f'Generation of best solution: {ga_instance.best_solution_generation}')\n", - "print(\"Fitness value of the best solution = {solution_fitness}\".format(solution_fitness=solution_fitness))\n", - "print(\"Index of the best solution : {solution_idx}\".format(solution_idx=solution_idx))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yxGGtFfE3j_1", - "outputId": "d5b68eaa-167f-47c8-aadd-10ecb3eb080c" - }, - "id": "yxGGtFfE3j_1", - "execution_count": 147, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Generation of best solution: 25\n", - "Fitness value of the best solution = 0.0010087414431375688\n", - "Index of the best solution : 0\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "verify_solution(solution, len(stores))\n", - "solution" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NKvGE63abjmU", - "outputId": "75249196-afeb-4ee6-9b96-9674ea8493c9" - }, - "id": "NKvGE63abjmU", - "execution_count": 148, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Failed values less than or above max possible value\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([ 0, 1, 164, 19, 77, 23, 10, 9, 154, 158, 157, 26, 22,\n", - " 92, 42, 137, 75, 143, 149, 12, 100, 85, 86, 124, 128, 135,\n", - " 147, 54, 24, 3, 58, 123, 153, 51, 29, 69, 20, 110, 59,\n", - " 95, 113, 115, 121, 91, 36, 64, 65, 32, 53, 35, 105, 52,\n", - " 21, 34, 133, 109, 47, 71, 98, 106, 131, 89, 108, 56, 152,\n", - " 150, 7, 38, 43, 94, 8, 132, 155, 4, 16, 84, 90, 27,\n", - " 2, 144, 151, 39, 45, 159, 125, 79, 156, 40, 6, 74, 139,\n", - " 141, 145, 76, 104, 50, 37, 129, 130, 72, 142, 97, 25, 93,\n", - " 134, 126, 138, 140, 148, 120, 96, 28, 160, 116, 18, 112, 31,\n", - " 41, 55, 63, 73, 122, 162, 161, 163, 66, 107, 17, 87, 103,\n", - " 80, 81, 88, 83, 82, 14, 33, 11, 46, 61, 60, 136, 146,\n", - " 15, 70, 44, 48, 67, 78, 111, 13, 62, 30, 118, 114, 99,\n", - " 102, 5, 68, 49, 57, 101, 117, 127, 119, 0])" - ] - }, - "metadata": {}, - "execution_count": 148 - } - ] - }, - { - "cell_type": "code", - "source": [ - "points = [genes.get(stores[id]) + [stores[id]] for id in solution]\n", - "points[:5]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "G8bOm7PGcPx_", - "outputId": "f78d25f6-3679-489b-c533-2971ac389941" - }, - "id": "G8bOm7PGcPx_", - "execution_count": 150, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[51.877854, -0.376379, '12851-253386'],\n", - " [51.877854, -0.376289, '7187-253385'],\n", - " [51.655847, -0.277039, '47771-259784'],\n", - " [51.51402, -0.13925, '12021-10341'],\n", - " [51.54541, -0.16269, '12158-22023']]" - ] - }, - "metadata": {}, - "execution_count": 150 - } - ] - }, - { - "cell_type": "code", - "source": [ - "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")\n", - "\n", - "for point in range(0, len(points)):\n", - " folium.Marker(\n", - " [points[point][0], points[point][1]], popup=f'{points[point][2]}'\n", - " ).add_to(map)\n", - "\n", - " try:\n", - " folium.PolyLine([(points[point][0], points[point][1]), \n", - " (points[point+1][0], points[point+1][1])],\n", - " color='red',\n", - " weight=5,\n", - " opacity=0.8).add_to(map)\n", - "\n", - " except IndexError:\n", - " pass" - ], - "metadata": { - "id": "_KtJJkkvcY-E" - }, - "id": "_KtJJkkvcY-E", - "execution_count": 151, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "map" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 531 - }, - "id": "PMH_yECHcaH8", - "outputId": "708aca8b-c6c9-4008-cc54-e14bb37b180f" - }, - "id": "PMH_yECHcaH8", - "execution_count": 152, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "
Make this Notebook Trusted to load map: File -> Trust Notebook
" - ] - }, - "metadata": {}, - "execution_count": 152 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 10. Total result \n", - "\n", - "The total resulting distance around London after optimising the solution is:" - ], - "metadata": { - "id": "4ek_Es2DcbXU" - }, - "id": "4ek_Es2DcbXU" - }, - { - "cell_type": "code", - "source": [ - "def distance(solution):\n", - " # loop through the length of the chromosome finding the distance between each\n", - " # gene added \n", - "\n", - " # to increment\n", - " total_dist = 0\n", - "\n", - " for gene in range(0, len(solution)):\n", - "\n", - " # get the lon lat of the two points\n", - " a = genes.get(stores[solution[gene]])\n", - " \n", - " try:\n", - " b = genes.get(stores[solution[gene + 1]])\n", - "\n", - " # find the distance (crow flies)\n", - " dist = geodesic(a, b).kilometers\n", - "\n", - " except IndexError:\n", - " dist = 0\n", - "\n", - " \n", - " total_dist += dist\n", - "\n", - " # to optimise this value in the positive direction the inverse of dist is used\n", - "\n", - " return total_dist " - ], - "metadata": { - "id": "SiCFxx7WcwxV" - }, - "id": "SiCFxx7WcwxV", - "execution_count": 153, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "distance(solution)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nds6G5tmnJ8K", - "outputId": "fe4ccfc6-a5c0-4e53-c0cd-acfe62bd6052" - }, - "id": "nds6G5tmnJ8K", - "execution_count": 154, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "991.3343075204886" - ] - }, - "metadata": {}, - "execution_count": 154 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Which is not too bad for 975 cups of joe. 🥤" - ], - "metadata": { - "id": "bukLealWcxOz" - }, - "id": "bukLealWcxOz" - } - ], - "metadata": { - "colab": { - "provenance": [], - "collapsed_sections": [ - "sEtmfvLsvMC2", - "1sg-1A4Ih1L4" - ], - "toc_visible": true - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "accelerator": "GPU" - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "95ba26f7" + }, + "source": [ + "# The Travelling Coffee Drinker - Genetic Algorithm\n", + "\n", + "Solving a travelling salesman problem for United Kingdom Starbucks Cafés" + ], + "id": "95ba26f7" + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Load and transform data\n", + "\n", + "The data comes from Kaggle, which is accessed using the API wrapper.\n", + "\n", + "The transformation needed is just to filter only GB Starbucks restaurants with a valid lon/lat pair." + ], + "metadata": { + "id": "sEtmfvLsvMC2" + }, + "id": "sEtmfvLsvMC2" + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_HBlGrIyYa7G", + "outputId": "cf89fc50-e566-4fe2-f54c-178b9d6e1acd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: pygad==2.17 in /usr/local/lib/python3.7/dist-packages (2.17.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from pygad==2.17) (1.21.6)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from pygad==2.17) (3.2.2)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (2.8.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (1.4.4)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (3.0.9)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pygad==2.17) (0.11.0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->pygad==2.17) (4.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->pygad==2.17) (1.15.0)\n" + ] + } + ], + "source": [ + "!pip install pygad==2.17" + ], + "id": "_HBlGrIyYa7G" + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "id": "buoFAtjz2duB" + }, + "outputs": [], + "source": [ + "!pip install -q kaggle" + ], + "id": "buoFAtjz2duB" + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 375, + "resources": { + "http://localhost:8080/nbextensions/google.colab/files.js": { + "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgZG8gewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwoKICAgICAgbGV0IHBlcmNlbnREb25lID0gZmlsZURhdGEuYnl0ZUxlbmd0aCA9PT0gMCA/CiAgICAgICAgICAxMDAgOgogICAgICAgICAgTWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCk7CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPSBgJHtwZXJjZW50RG9uZX0lIGRvbmVgOwoKICAgIH0gd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCk7CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", + "headers": [ + [ + "content-type", + "application/javascript" + ] + ], + "ok": true, + "status": 200, + "status_text": "" + } + } + }, + "id": "WYcYq9YV2gi9", + "outputId": "aef25ebb-cdaf-449f-c280-05f0faf1157f" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "error", + "ename": "KeyboardInterrupt", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mfiles\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36mupload\u001b[0;34m()\u001b[0m\n\u001b[1;32m 39\u001b[0m \"\"\"\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0muploaded_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_upload_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmultiple\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;31m# Mapping from original filename to filename as saved locally.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0mlocal_filenames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/files.py\u001b[0m in \u001b[0;36m_upload_files\u001b[0;34m(multiple)\u001b[0m\n\u001b[1;32m 116\u001b[0m result = _output.eval_js(\n\u001b[1;32m 117\u001b[0m 'google.colab._files._uploadFiles(\"{input_id}\", \"{output_id}\")'.format(\n\u001b[0;32m--> 118\u001b[0;31m input_id=input_id, output_id=output_id))\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mfiles\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_collections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefaultdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/output/_js.py\u001b[0m in \u001b[0;36meval_js\u001b[0;34m(script, ignore_result, timeout_sec)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mignore_result\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 40\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_message\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_read_next_input_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_NOT_READY\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.025\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m if (reply.get('type') == 'colab_reply' and\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "from google.colab import files\n", + "files.upload() # upload a Kaggle JSON file to make request for data " + ], + "id": "WYcYq9YV2gi9" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TBtskqtn29H3" + }, + "outputs": [], + "source": [ + "!mkdir kaggle " + ], + "id": "TBtskqtn29H3" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oK1Z1Yr12tEv" + }, + "outputs": [], + "source": [ + "!cp kaggle.json ~/.kaggle/\n", + "!chmod 600 ~/.kaggle/kaggle.json" + ], + "id": "oK1Z1Yr12tEv" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8U2rpJ1j191n" + }, + "outputs": [], + "source": [ + "!kaggle datasets download kukuroo3/starbucks-locations-worldwide-2021-version -p /content/sample_data/ --unzip" + ], + "id": "8U2rpJ1j191n" + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "id": "sTkY5cLb3age", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a064aedc-8e4d-4712-d223-e35db3d1d7ae" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['Unnamed: 0', 'storeNumber', 'countryCode', 'ownershipTypeCode',\n", + " 'schedule', 'slug', 'latitude', 'longitude', 'streetAddressLine1',\n", + " 'streetAddressLine2', 'streetAddressLine3', 'city',\n", + " 'countrySubdivisionCode', 'postalCode', 'currentTimeOffset',\n", + " 'windowsTimeZoneId', 'olsonTimeZoneId'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 165 + } + ], + "source": [ + "import pandas as pd \n", + "\n", + "# read in data and check column names \n", + "data = pd.read_csv('/content/sample_data/startbucks.csv')\n", + "data.columns" + ], + "id": "sTkY5cLb3age" + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": { + "id": "SImMYnfe3n_w", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4ee033d9-9f66-489a-c470-b45a1a3b0ac0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0" + ] + }, + "metadata": {}, + "execution_count": 166 + } + ], + "source": [ + "df = data[data['countryCode']=='GB']\n", + "df.reset_index(inplace=True)\n", + "\n", + "# check for invalid lon/lat pairs\n", + "len(df.dropna(subset=['latitude', 'longitude'])) - len(df)" + ], + "id": "SImMYnfe3n_w" + }, + { + "cell_type": "markdown", + "source": [ + "## 2. Exploratory analysis\n", + "\n", + "Find the distribution of cafés across the United Kingdom. \n", + "\n", + "How are restaurants distributed across towns?\n", + "What does a geospatial representation of the data look like?" + ], + "metadata": { + "id": "ovWqqNFIvydy" + }, + "id": "ovWqqNFIvydy" + }, + { + "cell_type": "markdown", + "source": [ + "### 2.1 Distribution of cafés by town" + ], + "metadata": { + "id": "SokLPN9j2fn1" + }, + "id": "SokLPN9j2fn1" + }, + { + "cell_type": "code", + "source": [ + "import plotly.express as px\n", + "vis = df.groupby('city').storeNumber.count().reset_index()\n", + "px.bar(vis, x='city', y='storeNumber', template='seaborn')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "HGOBgqnX2lIo", + "outputId": "0f19d0f2-0063-458a-cd02-987e30b3d9cf" + }, + "id": "HGOBgqnX2lIo", + "execution_count": 167, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "### 2.2 Map of cafés in the UK" + ], + "metadata": { + "id": "z9a3NPSM2lb4" + }, + "id": "z9a3NPSM2lb4" + }, + { + "cell_type": "code", + "source": [ + "import folium" + ], + "metadata": { + "id": "dAq1AQyfwN_l" + }, + "id": "dAq1AQyfwN_l", + "execution_count": 168, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")" + ], + "metadata": { + "id": "V9b0_8g_xM5K" + }, + "id": "V9b0_8g_xM5K", + "execution_count": 169, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "for _, r in df.iterrows():\n", + " folium.Marker(\n", + " [r['latitude'], r['longitude']], popup=f'{r[\"storeNumber\"]}'\n", + " ).add_to(map)" + ], + "metadata": { + "id": "ZiprKw6ExhQR" + }, + "id": "ZiprKw6ExhQR", + "execution_count": 170, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "map" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "iVnEBooHc0ze", + "outputId": "b6ee729b-739f-4db5-975f-12a6a2c82246" + }, + "id": "iVnEBooHc0ze", + "execution_count": 171, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ] + }, + "metadata": {}, + "execution_count": 171 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## 3. Testing the distance methodology\n", + "\n", + "To assess how good each solution is there needs to be a measure of fitness. For the purpose of this example the distance 'as the crow flies' is used without taking into account actual road distances however this could be explored in future." + ], + "metadata": { + "id": "1sg-1A4Ih1L4" + }, + "id": "1sg-1A4Ih1L4" + }, + { + "cell_type": "code", + "source": [ + "from geopy.distance import geodesic" + ], + "metadata": { + "id": "IbG9XRkLh0Aj" + }, + "id": "IbG9XRkLh0Aj", + "execution_count": 173, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The tested origin is the first Starbucks in the data and the destination is the second Starbucks in the dataset." + ], + "metadata": { + "id": "zgmh6b1l3liN" + }, + "id": "zgmh6b1l3liN" + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": { + "id": "TyoeO2I-4bGq" + }, + "outputs": [], + "source": [ + "origin = (df['latitude'][0], df['longitude'][0])\n", + "dest = (df['latitude'][100], df['longitude'][100])" + ], + "id": "TyoeO2I-4bGq" + }, + { + "cell_type": "markdown", + "source": [ + "The distance between the two points as the crow flies in kilometres is given below." + ], + "metadata": { + "id": "VUr7ZAzw3srI" + }, + "id": "VUr7ZAzw3srI" + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": { + "id": "zXo1I-5Q4Lwn", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4672edf2-9142-4276-ab6a-2b35b65b8b3d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "81.63683980420957" + ] + }, + "metadata": {}, + "execution_count": 175 + } + ], + "source": [ + "geodesic(origin, dest).kilometers" + ], + "id": "zXo1I-5Q4Lwn" + }, + { + "cell_type": "markdown", + "source": [ + "## 4. Preparing data structures\n", + "\n", + "The data structures needed for testing solutions are the \"genes\" or store options to select from named *genes*\n", + "\n", + "A lookup to access these genes known as *stores* \n", + "\n", + "A *check_range* which is used to check that every option is given in a solution (a key criteria in the TSP).\n" + ], + "metadata": { + "id": "GeRIsd5G378L" + }, + "id": "GeRIsd5G378L" + }, + { + "cell_type": "code", + "source": [ + "test = df.head(10)\n", + "genes = {store_num:[lat, lon] for store_num, lat, lon in zip(test['storeNumber'], test['latitude'], test['longitude'])}\n", + "stores = list(genes.keys())\n", + "check_range = [i for i in range(0, 10)]" + ], + "metadata": { + "id": "rkKStcn4iIrN" + }, + "id": "rkKStcn4iIrN", + "execution_count": 176, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 5. Defining functions \n", + "\n", + "The algorithm requires a set of functions to be pre-defined as the out of the box genetic algorithm does not support a TSP.\n", + "\n", + " 1. build_population: builds a population of chromosomes to test with proper restrictions applied\n", + " 2. fitness_func: Used to test a solution to see how well it performs, in this case the fitness_func will be assessed based on the distance as the crow flies between each successive point\n", + " 3. pmx_crossover: performs the crossover of a parent and child with proper Partially Matched Crossover (PMX) logic\n", + " 4. crossover_func: applies the crossover\n", + " 5. on_crossover: applies the mutation after crossover\n", + " 6. on_generation: used to print the progress and results at each generation" + ], + "metadata": { + "id": "5mKC2lKO4tRY" + }, + "id": "5mKC2lKO4tRY" + }, + { + "cell_type": "code", + "source": [ + "import random\n", + "import numpy as np\n", + "from geopy.distance import geodesic" + ], + "metadata": { + "id": "15-IiuRNljOC" + }, + "id": "15-IiuRNljOC", + "execution_count": 177, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Assess the quality or fitness of a solution so that only the fittest are selected for the next generation and to breed." + ], + "metadata": { + "id": "BhCEN9165in3" + }, + "id": "BhCEN9165in3" + }, + { + "cell_type": "code", + "source": [ + "def build_population(size, chromosome_size):\n", + " population = []\n", + " for i in range(size):\n", + " home_city = 0\n", + " added = {home_city:'Added'}\n", + " chromosome = [home_city]\n", + "\n", + " while len(chromosome) < chromosome_size:\n", + " proposed_gene = random.randint(0, chromosome_size-1)\n", + " if added.get(proposed_gene) is None:\n", + " chromosome.append(proposed_gene)\n", + " added.update({proposed_gene:'Added'})\n", + " else:\n", + " pass\n", + "\n", + " chromosome.append(home_city)\n", + "\n", + " population.append(chromosome)\n", + "\n", + " return np.array(population)" + ], + "metadata": { + "id": "BLz2sU2n78Ui" + }, + "id": "BLz2sU2n78Ui", + "execution_count": 178, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "population = build_population(100, 10)\n", + "population.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vs8CrTNV9iqe", + "outputId": "4178c652-2593-44e9-ca0d-1829f11c6d5e" + }, + "id": "vs8CrTNV9iqe", + "execution_count": 179, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(100, 11)" + ] + }, + "metadata": {}, + "execution_count": 179 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def fitness_func(solution, solution_idx):\n", + " # loop through the length of the chromosome finding the distance between each\n", + " # gene added \n", + "\n", + " # to increment\n", + " total_dist = 0\n", + "\n", + " for gene in range(0, len(solution)):\n", + "\n", + " # get the lon lat of the two points\n", + " a = genes.get(stores[solution[gene]])\n", + " \n", + " try:\n", + " b = genes.get(stores[solution[gene + 1]])\n", + "\n", + " # find the distance (crow flies)\n", + " dist = geodesic(a, b).kilometers\n", + "\n", + " except IndexError:\n", + " dist = 0\n", + "\n", + " total_dist += dist\n", + "\n", + " # to optimise this value in the positive direction the inverse of dist is used\n", + " fitness = 1 / total_dist\n", + "\n", + " return fitness " + ], + "metadata": { + "id": "5u-5msoj-84i" + }, + "id": "5u-5msoj-84i", + "execution_count": 180, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def pmx_crossover(parent1, parent2, sequence_start, sequence_end):\n", + " # initialise a child\n", + " child = np.zeros(parent1.shape[0])\n", + "\n", + " # get the genes for parent one that are passed on to child one\n", + " parent1_to_child1_genes = parent1[sequence_start:sequence_end]\n", + "\n", + " # get the position of genes for each respective combination\n", + " parent1_to_child1 = np.isin(parent1,parent1_to_child1_genes).nonzero()[0]\n", + "\n", + " for gene in parent1_to_child1:\n", + " child[gene] = parent1[gene]\n", + "\n", + " # gene of parent 2 not in the child\n", + " genes_not_in_child = parent2[np.isin(parent2, parent1_to_child1_genes, invert=True).nonzero()[0]]\n", + " \n", + " # if the gene is not already\n", + " if genes_not_in_child.shape[0] >= 1:\n", + " for gene in genes_not_in_child:\n", + " if gene >= 1:\n", + " lookup = gene\n", + " not_in_sequence = True\n", + "\n", + " while not_in_sequence:\n", + " position_in_parent2 = np.where(parent2==lookup)[0][0]\n", + "\n", + " if position_in_parent2 in range(sequence_start, sequence_end):\n", + " lookup = parent1[position_in_parent2]\n", + "\n", + " else:\n", + " child[position_in_parent2] = gene\n", + " not_in_sequence = False\n", + "\n", + " return child" + ], + "metadata": { + "id": "OpbfyShQes_q" + }, + "id": "OpbfyShQes_q", + "execution_count": 181, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def crossover_func(parents, offspring_size, ga_instance):\n", + " offspring = []\n", + " idx = 0\n", + " while len(offspring) != offspring_size[0]:\n", + "\n", + " # locate the parents\n", + " parent1 = parents[idx % parents.shape[0], :].copy()\n", + " parent2 = parents[(idx + 1) % parents.shape[0], :].copy()\n", + "\n", + " # find gene sequence in parent 1 \n", + " sequence_start = random.randint(1, parent1.shape[0]-4)\n", + " sequence_end = random.randint(sequence_start, parent1.shape[0]-1)\n", + "\n", + " # perform crossover\n", + " child1 = pmx_crossover(parent1, parent2, sequence_start, sequence_end)\n", + " child2 = pmx_crossover(parent2, parent1, sequence_start, sequence_end) \n", + "\n", + " offspring.append(child1)\n", + " offspring.append(child2)\n", + "\n", + "\n", + " idx += 1\n", + "\n", + " return np.array(offspring)" + ], + "metadata": { + "id": "shgFWqH2NinO" + }, + "id": "shgFWqH2NinO", + "execution_count": 182, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The mutation function chosen is inversion as it does not invalidate the solution." + ], + "metadata": { + "id": "bCjaaeofA7Bg" + }, + "id": "bCjaaeofA7Bg" + }, + { + "cell_type": "code", + "source": [ + "def mutation_func(offspring, ga_instance):\n", + "\n", + " for chromosome_idx in range(offspring.shape[0]):\n", + " # define a sequence of genes to reverse\n", + " sequence_start = random.randint(1, offspring[chromosome_idx].shape[0] - 2)\n", + " sequence_end = random.randint(sequence_start, offspring[chromosome_idx].shape[0] - 1)\n", + " \n", + " genes = offspring[chromosome_idx, sequence_start:sequence_end]\n", + "\n", + " # start at the start of the sequence assigning the reverse sequence back to the chromosome\n", + " index = 0\n", + " if len(genes) > 0:\n", + " for gene in range(sequence_start, sequence_end):\n", + "\n", + " offspring[chromosome_idx, gene] = genes[index]\n", + "\n", + " index += 1\n", + "\n", + " return offspring" + ], + "metadata": { + "id": "0kMm7J1WAsvH" + }, + "id": "0kMm7J1WAsvH", + "execution_count": 183, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Used in the genetic algorithm flow to apply the custom mutation after crossover" + ], + "metadata": { + "id": "HTeKwuPl5vVt" + }, + "id": "HTeKwuPl5vVt" + }, + { + "cell_type": "code", + "source": [ + "def on_crossover(ga_instance, offspring_crossover):\n", + " # apply mutation to ensure uniqueness \n", + " offspring_mutation = mutation_func(offspring_crossover, ga_instance)\n", + "\n", + " # save the new offspring set as the parents of the next generation\n", + " ga_instance.last_generation_offspring_mutation = offspring_mutation" + ], + "metadata": { + "id": "ucwYe4rgwLQC" + }, + "id": "ucwYe4rgwLQC", + "execution_count": 184, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Added for debugging and assessing progress by generation at runtime" + ], + "metadata": { + "id": "Xf7rgtuO532X" + }, + "id": "Xf7rgtuO532X" + }, + { + "cell_type": "code", + "source": [ + "def on_generation(ga):\n", + " print(\"Generation\", ga.generations_completed)\n", + " print(ga.population)" + ], + "metadata": { + "id": "eqRwVsSBMf_B" + }, + "id": "eqRwVsSBMf_B", + "execution_count": 185, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 6. Executing the algorithm\n", + "\n", + "The genetic algorithm is set up as instance and at initialisation several parameters are given. \n", + "\n", + "The algorithm then runs to find the best solution for a set number of generations." + ], + "metadata": { + "id": "C9AtXoqx58x0" + }, + "id": "C9AtXoqx58x0" + }, + { + "cell_type": "code", + "source": [ + "import pygad" + ], + "metadata": { + "id": "BrJVLZlBW07R" + }, + "id": "BrJVLZlBW07R", + "execution_count": 186, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 6.1 Example Initialising the algorithm\n", + "\n", + "The algorithm is initialised below.\n", + "\n", + "Notable parameters include:\n", + " - The use of gene space to limit the possible genes chosen to just be those in the TSP range\n", + " - Mutations being turned off temporarily\n", + " - Implementation of custom on_ functions \n", + " - Allow duplication of genes parameter set to false to ensure any newly introduced chromosomes/chromosomes created as population is initialised have no duplicate genes" + ], + "metadata": { + "id": "6ni9VkQv6TJR" + }, + "id": "6ni9VkQv6TJR" + }, + { + "cell_type": "code", + "source": [ + "ga_instance = pygad.GA(num_generations=100,\n", + " num_parents_mating=40,\n", + " fitness_func=fitness_func,\n", + " sol_per_pop=200,\n", + " initial_population=population,\n", + " gene_space=range(0, 10),\n", + " gene_type=int,\n", + " mutation_type=mutation_func,\n", + " on_generation=on_generation,\n", + " crossover_type=crossover_func, \n", + " keep_parents=6,\n", + " mutation_probability=0.4)" + ], + "metadata": { + "id": "FfFdncf-G3Mr" + }, + "id": "FfFdncf-G3Mr", + "execution_count": 187, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 6.2 Running the algorithm \n", + "\n", + "The genetic algorithm is run with a simple function call" + ], + "metadata": { + "id": "sfqme_5461A4" + }, + "id": "sfqme_5461A4" + }, + { + "cell_type": "code", + "source": [ + "ga_instance.run()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g-3CjxWYZh3H", + "outputId": "dc1fbda5-6b22-4245-9b99-e22b0e41684f" + }, + "id": "g-3CjxWYZh3H", + "execution_count": 188, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Generation 1\n", + "[[0 3 2 ... 4 5 0]\n", + " [0 3 6 ... 1 2 0]\n", + " [0 8 3 ... 6 1 0]\n", + " ...\n", + " [0 9 5 ... 7 4 0]\n", + " [0 2 7 ... 8 6 0]\n", + " [0 3 5 ... 6 8 0]]\n", + "Generation 2\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 9 ... 8 3 0]\n", + " [0 3 2 ... 4 5 0]\n", + " ...\n", + " [0 3 6 ... 1 2 0]\n", + " [0 3 6 ... 1 2 0]\n", + " [0 3 1 ... 6 2 0]]\n", + "Generation 3\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 9 ... 8 3 0]\n", + " [0 3 2 ... 4 5 0]\n", + " [0 9 2 ... 8 3 0]]\n", + "Generation 4\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 5\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 6\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 7\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 8\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 9\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 10\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 11\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 12\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 13\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 14\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 15\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 16\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 17\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 18\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 19\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 20\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 21\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 22\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 23\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 24\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 25\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 26\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 27\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 28\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 29\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 30\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 31\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 32\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 33\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 34\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 35\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 36\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 37\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 38\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 39\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 40\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 41\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 42\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 43\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 44\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 45\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 46\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 47\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 48\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 49\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 50\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 51\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 52\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 53\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 54\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 55\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 56\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 57\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 58\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 59\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 60\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 61\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 62\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 63\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 64\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 65\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 66\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 67\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 68\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 69\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 70\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 71\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 72\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 73\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 74\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 75\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 76\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 77\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 78\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 79\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 80\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 81\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 82\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 83\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 84\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 85\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 86\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 87\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 88\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 89\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 90\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 91\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 92\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 93\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 94\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 95\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 96\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 97\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 98\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 99\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n", + "Generation 100\n", + "[[0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " ...\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]\n", + " [0 1 2 ... 8 6 0]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## 7. Assessing results \n", + "\n", + "The result solution can be checked and analysed using the ga_instance itself" + ], + "metadata": { + "id": "woJTWlOrYLwl" + }, + "id": "woJTWlOrYLwl" + }, + { + "cell_type": "code", + "source": [ + "solution, solution_fitness, solution_idx = ga_instance.best_solution()" + ], + "metadata": { + "id": "BZxvMCZ-aFtJ" + }, + "id": "BZxvMCZ-aFtJ", + "execution_count": 189, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "solution, solution_fitness, solution_idx = ga_instance.best_solution()\n", + "print(f'Generation of best solution: {ga_instance.best_solution_generation}')\n", + "print(\"Fitness value of the best solution = {solution_fitness}\".format(solution_fitness=solution_fitness))\n", + "print(\"Index of the best solution : {solution_idx}\".format(solution_idx=solution_idx))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IOTGnna-QXsB", + "outputId": "30d494d6-e88e-4061-a453-9a44a9409176" + }, + "id": "IOTGnna-QXsB", + "execution_count": 190, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Generation of best solution: 1\n", + "Fitness value of the best solution = 0.010681933534441102\n", + "Index of the best solution : 0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "if ga_instance.best_solution_generation != -1:\n", + " print(\"Best fitness value reached after {best_solution_generation} generations.\".format(best_solution_generation=ga_instance.best_solution_generation))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WsjTUVloQhez", + "outputId": "fdd41ccb-9ead-49d7-d180-ccc1dd2d13f9" + }, + "id": "WsjTUVloQhez", + "execution_count": 191, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Best fitness value reached after 1 generations.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 7.1 Verifying a solution\n", + "\n", + "For a solution to be valid it needs to have:\n", + " - A maximum gene value that matches the total number of stores \n", + " - A minimum gene value of 0 \n", + " - Each gene must be unique" + ], + "metadata": { + "id": "FiLlGlILYWhw" + }, + "id": "FiLlGlILYWhw" + }, + { + "cell_type": "code", + "source": [ + "def verify_solution(solution, max_gene):\n", + " if min(solution) != 0:\n", + " print('Failed values below 0')\n", + "\n", + " if max(solution) != max_gene:\n", + " print('Failed values less than or above max possible value')\n", + "\n", + " if len(set(solution)) - len(solution) != -1:\n", + " print(len(set(solution)) - len(solution))\n", + " print('Failed solution does not contain unique values')" + ], + "metadata": { + "id": "vEhveNG4VQBF" + }, + "id": "vEhveNG4VQBF", + "execution_count": 192, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "verify_solution(solution, 9)" + ], + "metadata": { + "id": "IQg1sULKYRZe" + }, + "id": "IQg1sULKYRZe", + "execution_count": 193, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "solution" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VqjdGMGnYqz-", + "outputId": "152c6b82-4787-4776-b9e9-bc3ceb1ccec1" + }, + "id": "VqjdGMGnYqz-", + "execution_count": 194, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([0, 1, 2, 3, 4, 5, 7, 9, 8, 6, 0])" + ] + }, + "metadata": {}, + "execution_count": 194 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 7.2 Interpreting the result \n", + "\n", + "The result sequence can be used to access latitude and longitude for each store in the solution." + ], + "metadata": { + "id": "5ixLDfvtY3cI" + }, + "id": "5ixLDfvtY3cI" + }, + { + "cell_type": "code", + "source": [ + "points = [genes.get(stores[id]) + [stores[id]] for id in solution]\n", + "points[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tv-AjzcoQwGc", + "outputId": "a5187ac8-ea84-4dc9-992c-60cfa4dcab8a" + }, + "id": "tv-AjzcoQwGc", + "execution_count": 195, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[[51.483556, -1.557143, '9155-152277'],\n", + " [51.482387, -1.555109, '22194-218828'],\n", + " [51.481264, -1.556526, '18362-190424'],\n", + " [51.481177, -1.557422, '9136-152279'],\n", + " [51.562617, -1.798111, '47832-260044']]" + ] + }, + "metadata": {}, + "execution_count": 195 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import folium \n", + "\n", + "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")\n", + "\n", + "for point in range(0, len(points)):\n", + " folium.Marker(\n", + " [points[point][0], points[point][1]], popup=f'{points[point][2]}'\n", + " ).add_to(map)\n", + "\n", + " try:\n", + " folium.PolyLine([(points[point][0], points[point][1]), \n", + " (points[point+1][0], points[point+1][1])],\n", + " color='red',\n", + " weight=5,\n", + " opacity=0.8).add_to(map)\n", + "\n", + " except IndexError:\n", + " pass\n", + " " + ], + "metadata": { + "id": "Tq0hTc5cstWX" + }, + "id": "Tq0hTc5cstWX", + "execution_count": 196, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "map" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "ebkOuHleuGU1", + "outputId": "801c457d-49d9-452d-9c4b-872aef5816d4" + }, + "id": "ebkOuHleuGU1", + "execution_count": 197, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ] + }, + "metadata": {}, + "execution_count": 197 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The map shows the shortest path that has been found. So that the travelling coffee drinker can maximise the time on coffee and minimise the time on travelling.\n", + "\n", + "Now the algorithm can be scaled up for the whole of the UK, or tailored to just one town. An example of the solution scaled to the UK is given below." + ], + "metadata": { + "id": "5Sz8ykeEYDS3" + }, + "id": "5Sz8ykeEYDS3" + }, + { + "cell_type": "markdown", + "source": [ + "## 8. Scaling up the solution\n", + "\n", + "This is where the fun begins!" + ], + "metadata": { + "id": "gscT4SjxZ9h4" + }, + "id": "gscT4SjxZ9h4" + }, + { + "cell_type": "code", + "source": [ + "df = df[df['city'] == 'London']\n", + "genes = {store_num:[lat, lon] for store_num, lat, lon in zip(df['storeNumber'], df['latitude'], df['longitude'])}\n", + "stores = list(genes.keys())\n", + "len(stores)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yYY3-gP1aFkH", + "outputId": "907e2a7c-3292-4409-e83e-4db372f09a45" + }, + "id": "yYY3-gP1aFkH", + "execution_count": 157, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "165" + ] + }, + "metadata": {}, + "execution_count": 157 + } + ] + }, + { + "cell_type": "code", + "source": [ + "population = build_population(200, 165)\n", + "len(population[0])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oykOz7l4KhJE", + "outputId": "fe7fdabc-a909-40c4-e69a-daba8169b07c" + }, + "id": "oykOz7l4KhJE", + "execution_count": 155, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "166" + ] + }, + "metadata": {}, + "execution_count": 155 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 8.1 Building the final algorithm\n", + "\n", + "The code to build the algorithm has to be re-run with the above data structures altered." + ], + "metadata": { + "id": "ioy8EGjEbnBJ" + }, + "id": "ioy8EGjEbnBJ" + }, + { + "cell_type": "code", + "source": [ + "def fitness_func(solution, solution_idx):\n", + " # loop through the length of the chromosome finding the distance between each\n", + " # gene added \n", + "\n", + " # to increment\n", + " total_dist = 0\n", + "\n", + " for gene in range(0, len(solution)):\n", + "\n", + " # get the lon lat of the two points\n", + " a = genes.get(stores[solution[gene]])\n", + " \n", + " try:\n", + " b = genes.get(stores[solution[gene + 1]])\n", + "\n", + " # find the distance (crow flies)\n", + " dist = geodesic(a, b).kilometers\n", + "\n", + " except IndexError:\n", + " dist = 0\n", + "\n", + " total_dist += dist\n", + "\n", + " # to optimise this value in the positive direction the inverse of dist is used\n", + " fitness = 1 / total_dist\n", + "\n", + " return fitness " + ], + "metadata": { + "id": "uBfcikkma5hP" + }, + "id": "uBfcikkma5hP", + "execution_count": 108, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def pmx_crossover(parent1, parent2, sequence_start, sequence_end):\n", + " # initialise a child\n", + " child = np.zeros(parent1.shape[0])\n", + "\n", + " # get the genes for parent one that are passed on to child one\n", + " parent1_to_child1_genes = parent1[sequence_start:sequence_end]\n", + "\n", + " # get the position of genes for each respective combination\n", + " parent1_to_child1 = np.isin(parent1,parent1_to_child1_genes).nonzero()[0]\n", + "\n", + " for gene in parent1_to_child1:\n", + " child[gene] = parent1[gene]\n", + "\n", + " # gene of parent 2 not in the child\n", + " genes_not_in_child = parent2[np.isin(parent2, parent1_to_child1_genes, invert=True).nonzero()[0]]\n", + " \n", + " if genes_not_in_child.shape[0] >= 1:\n", + " for gene in genes_not_in_child:\n", + " if gene >= 1:\n", + " lookup = gene\n", + " not_in_sequence = True\n", + "\n", + " while not_in_sequence:\n", + " position_in_parent2 = np.where(parent2==lookup)[0][0]\n", + "\n", + " if position_in_parent2 in range(sequence_start, sequence_end):\n", + " lookup = parent1[position_in_parent2]\n", + "\n", + " else:\n", + " child[position_in_parent2] = gene\n", + " not_in_sequence = False\n", + "\n", + " return child" + ], + "metadata": { + "id": "FuOkiStta7Pz" + }, + "id": "FuOkiStta7Pz", + "execution_count": 109, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def crossover_func(parents, offspring_size, ga_instance):\n", + " offspring = []\n", + " idx = 0\n", + " while len(offspring) != offspring_size[0]:\n", + "\n", + " # locate the parents\n", + " parent1 = parents[idx % parents.shape[0], :].copy()\n", + " parent2 = parents[(idx + 1) % parents.shape[0], :].copy()\n", + "\n", + " # find gene sequence in parent 1 \n", + " sequence_start = random.randint(1, parent1.shape[0]-4)\n", + " sequence_end = random.randint(sequence_start, parent1.shape[0]-1)\n", + "\n", + " # perform crossover\n", + " child1 = pmx_crossover(parent1, parent2, sequence_start, sequence_end)\n", + " child2 = pmx_crossover(parent2, parent1, sequence_start, sequence_end)\n", + " \n", + "\n", + " offspring.append(child1)\n", + " offspring.append(child2)\n", + "\n", + " idx += 1\n", + "\n", + " return np.array(offspring)" + ], + "metadata": { + "id": "O10ZgScUa_bj" + }, + "id": "O10ZgScUa_bj", + "execution_count": 130, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def mutation_func(offspring, ga_instance):\n", + "\n", + " for chromosome_idx in range(offspring.shape[0]):\n", + " # define a sequence of genes to reverse\n", + " sequence_start = random.randint(1, offspring[chromosome_idx].shape[0] - 2)\n", + " sequence_end = random.randint(sequence_start, offspring[chromosome_idx].shape[0] - 1)\n", + " \n", + " genes = offspring[chromosome_idx, sequence_start:sequence_end]\n", + "\n", + " # start at the start of the sequence assigning the reverse sequence back to the chromosome\n", + " index = 0\n", + " if len(genes) > 0:\n", + " for gene in range(sequence_start, sequence_end):\n", + "\n", + " offspring[chromosome_idx, gene] = genes[index]\n", + "\n", + " index += 1\n", + "\n", + " return offspring" + ], + "metadata": { + "id": "mLLY7Ub4K_Y5" + }, + "id": "mLLY7Ub4K_Y5", + "execution_count": 144, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def on_crossover(ga_instance, offspring_crossover):\n", + " # apply mutation to ensure uniqueness \n", + " offspring_mutation = mutation_func(offspring_crossover, ga_instance)\n", + "\n", + " # save the new offspring set as the parents of the next generation\n", + " ga_instance.last_generation_offspring_mutation = offspring_mutation" + ], + "metadata": { + "id": "QLtP6in4LFSw" + }, + "id": "QLtP6in4LFSw", + "execution_count": 126, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def on_generation(ga):\n", + " print(\"Generation\", ga.generations_completed)\n", + " print(ga.population)" + ], + "metadata": { + "id": "SnR2LaDJLGRj" + }, + "id": "SnR2LaDJLGRj", + "execution_count": 127, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "ga_instance = pygad.GA(num_generations=100,\n", + " num_parents_mating=40,\n", + " fitness_func=fitness_func,\n", + " sol_per_pop=200,\n", + " initial_population=population,\n", + " gene_space=range(0, 165),\n", + " gene_type=int,\n", + " mutation_type=mutation_func,\n", + " on_generation=on_generation,\n", + " crossover_type=crossover_func, \n", + " keep_parents=6,\n", + " mutation_probability=0.4)" + ], + "metadata": { + "id": "j2J5jlh9bDxR" + }, + "id": "j2J5jlh9bDxR", + "execution_count": 145, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "ga_instance.run()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lhW7JkMAbS6E", + "outputId": "c6130b2a-1e6b-4b7d-c2d6-f891b178fef9" + }, + "id": "lhW7JkMAbS6E", + "execution_count": 146, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Generation 1\n", + "[[ 0 1 111 ... 127 108 0]\n", + " [ 0 62 141 ... 26 161 0]\n", + " [ 0 137 155 ... 158 3 0]\n", + " ...\n", + " [ 0 142 162 ... 2 159 0]\n", + " [ 0 161 159 ... 112 66 0]\n", + " [ 0 152 108 ... 72 58 0]]\n", + "Generation 2\n", + "[[ 0 1 111 ... 127 108 0]\n", + " [ 0 1 111 ... 127 108 0]\n", + " [ 0 62 141 ... 26 161 0]\n", + " ...\n", + " [ 0 137 155 ... 135 76 0]\n", + " [ 0 137 155 ... 158 3 0]\n", + " [ 0 96 40 ... 135 5 0]]\n", + "Generation 3\n", + "[[ 0 1 145 ... 26 94 0]\n", + " [ 0 1 111 ... 127 108 0]\n", + " [ 0 1 111 ... 127 108 0]\n", + " ...\n", + " [ 0 89 155 ... 158 32 0]\n", + " [ 0 1 110 ... 127 94 0]\n", + " [ 0 96 40 ... 64 90 0]]\n", + "Generation 4\n", + "[[ 0 1 56 ... 26 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 110 ... 127 94 0]\n", + " ...\n", + " [ 0 1 111 ... 127 108 0]\n", + " [ 0 1 145 ... 26 22 0]\n", + " [ 0 1 77 ... 127 142 0]]\n", + "Generation 5\n", + "[[ 0 1 111 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 56 ... 127 81 0]\n", + " ...\n", + " [ 0 1 60 ... 127 118 0]\n", + " [ 0 1 154 ... 127 7 0]\n", + " [ 0 1 164 ... 127 94 0]]\n", + "Generation 6\n", + "[[ 0 1 56 ... 26 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 93 ... 127 108 0]\n", + " [ 0 1 30 ... 127 108 0]\n", + " [ 0 1 111 ... 127 108 0]]\n", + "Generation 7\n", + "[[ 0 1 56 ... 127 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 56 ... 26 81 0]\n", + " [ 0 1 56 ... 127 81 0]]\n", + "Generation 8\n", + "[[ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 26 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 56 ... 127 94 0]]\n", + "Generation 9\n", + "[[ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 33 ... 26 94 0]\n", + " [ 0 1 110 ... 127 108 0]]\n", + "Generation 10\n", + "[[ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 26 108 0]]\n", + "Generation 11\n", + "[[ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 81 0]]\n", + "Generation 12\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " [ 0 1 164 ... 127 81 0]]\n", + "Generation 13\n", + "[[ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 81 0]]\n", + "Generation 14\n", + "[[ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " ...\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 108 0]]\n", + "Generation 15\n", + "[[ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 94 0]\n", + " [ 0 1 164 ... 127 108 0]]\n", + "Generation 16\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 81 0]\n", + " ...\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 94 0]]\n", + "Generation 17\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 81 0]\n", + " [ 0 1 164 ... 127 108 0]\n", + " [ 0 1 164 ... 127 108 0]]\n", + "Generation 18\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 108 0]]\n", + "Generation 19\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 20\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 21\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 22\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 23\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 24\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 25\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 26\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 27\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 28\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 29\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 30\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 31\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 32\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 33\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 34\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 35\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 36\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 37\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 38\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 39\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 40\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 41\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 42\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 43\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 44\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 45\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 46\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 47\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 48\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 49\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 50\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 51\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 52\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 53\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 54\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 55\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 56\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 57\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 58\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 59\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 60\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 61\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 62\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 63\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 64\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 65\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 66\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 67\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 68\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 69\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 70\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 71\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 72\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 73\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 74\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 75\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 76\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 77\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 78\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 79\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 80\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 81\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 82\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 83\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 84\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 85\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 86\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 87\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 88\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 89\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 90\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 91\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 92\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 93\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 94\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 95\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 96\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 97\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 98\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 99\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n", + "Generation 100\n", + "[[ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " ...\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]\n", + " [ 0 1 164 ... 127 119 0]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## 8.2 Evaluating the final algorithm \n", + "\n", + "The overall solution can now be assessed." + ], + "metadata": { + "id": "KJY5PcFabWdL" + }, + "id": "KJY5PcFabWdL" + }, + { + "cell_type": "code", + "source": [ + "solution, solution_fitness, solution_idx = ga_instance.best_solution()\n", + "print(f'Generation of best solution: {ga_instance.best_solution_generation}')\n", + "print(\"Fitness value of the best solution = {solution_fitness}\".format(solution_fitness=solution_fitness))\n", + "print(\"Index of the best solution : {solution_idx}\".format(solution_idx=solution_idx))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yxGGtFfE3j_1", + "outputId": "d5b68eaa-167f-47c8-aadd-10ecb3eb080c" + }, + "id": "yxGGtFfE3j_1", + "execution_count": 147, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Generation of best solution: 25\n", + "Fitness value of the best solution = 0.0010087414431375688\n", + "Index of the best solution : 0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "verify_solution(solution, len(stores))\n", + "solution" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NKvGE63abjmU", + "outputId": "75249196-afeb-4ee6-9b96-9674ea8493c9" + }, + "id": "NKvGE63abjmU", + "execution_count": 148, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Failed values less than or above max possible value\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 0, 1, 164, 19, 77, 23, 10, 9, 154, 158, 157, 26, 22,\n", + " 92, 42, 137, 75, 143, 149, 12, 100, 85, 86, 124, 128, 135,\n", + " 147, 54, 24, 3, 58, 123, 153, 51, 29, 69, 20, 110, 59,\n", + " 95, 113, 115, 121, 91, 36, 64, 65, 32, 53, 35, 105, 52,\n", + " 21, 34, 133, 109, 47, 71, 98, 106, 131, 89, 108, 56, 152,\n", + " 150, 7, 38, 43, 94, 8, 132, 155, 4, 16, 84, 90, 27,\n", + " 2, 144, 151, 39, 45, 159, 125, 79, 156, 40, 6, 74, 139,\n", + " 141, 145, 76, 104, 50, 37, 129, 130, 72, 142, 97, 25, 93,\n", + " 134, 126, 138, 140, 148, 120, 96, 28, 160, 116, 18, 112, 31,\n", + " 41, 55, 63, 73, 122, 162, 161, 163, 66, 107, 17, 87, 103,\n", + " 80, 81, 88, 83, 82, 14, 33, 11, 46, 61, 60, 136, 146,\n", + " 15, 70, 44, 48, 67, 78, 111, 13, 62, 30, 118, 114, 99,\n", + " 102, 5, 68, 49, 57, 101, 117, 127, 119, 0])" + ] + }, + "metadata": {}, + "execution_count": 148 + } + ] + }, + { + "cell_type": "code", + "source": [ + "points = [genes.get(stores[id]) + [stores[id]] for id in solution]\n", + "points[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G8bOm7PGcPx_", + "outputId": "f78d25f6-3679-489b-c533-2971ac389941" + }, + "id": "G8bOm7PGcPx_", + "execution_count": 150, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[[51.877854, -0.376379, '12851-253386'],\n", + " [51.877854, -0.376289, '7187-253385'],\n", + " [51.655847, -0.277039, '47771-259784'],\n", + " [51.51402, -0.13925, '12021-10341'],\n", + " [51.54541, -0.16269, '12158-22023']]" + ] + }, + "metadata": {}, + "execution_count": 150 + } + ] + }, + { + "cell_type": "code", + "source": [ + "map = folium.Map(location=[51.509685, -0.118092], zoom_start=6, tiles=\"stamentoner\")\n", + "\n", + "for point in range(0, len(points)):\n", + " folium.Marker(\n", + " [points[point][0], points[point][1]], popup=f'{points[point][2]}'\n", + " ).add_to(map)\n", + "\n", + " try:\n", + " folium.PolyLine([(points[point][0], points[point][1]), \n", + " (points[point+1][0], points[point+1][1])],\n", + " color='red',\n", + " weight=5,\n", + " opacity=0.8).add_to(map)\n", + "\n", + " except IndexError:\n", + " pass" + ], + "metadata": { + "id": "_KtJJkkvcY-E" + }, + "id": "_KtJJkkvcY-E", + "execution_count": 151, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "map" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 531 + }, + "id": "PMH_yECHcaH8", + "outputId": "708aca8b-c6c9-4008-cc54-e14bb37b180f" + }, + "id": "PMH_yECHcaH8", + "execution_count": 152, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ] + }, + "metadata": {}, + "execution_count": 152 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## 10. Total result \n", + "\n", + "The total resulting distance around London after optimising the solution is:" + ], + "metadata": { + "id": "4ek_Es2DcbXU" + }, + "id": "4ek_Es2DcbXU" + }, + { + "cell_type": "code", + "source": [ + "def distance(solution):\n", + " # loop through the length of the chromosome finding the distance between each\n", + " # gene added \n", + "\n", + " # to increment\n", + " total_dist = 0\n", + "\n", + " for gene in range(0, len(solution)):\n", + "\n", + " # get the lon lat of the two points\n", + " a = genes.get(stores[solution[gene]])\n", + " \n", + " try:\n", + " b = genes.get(stores[solution[gene + 1]])\n", + "\n", + " # find the distance (crow flies)\n", + " dist = geodesic(a, b).kilometers\n", + "\n", + " except IndexError:\n", + " dist = 0\n", + "\n", + " \n", + " total_dist += dist\n", + "\n", + " # to optimise this value in the positive direction the inverse of dist is used\n", + "\n", + " return total_dist " + ], + "metadata": { + "id": "SiCFxx7WcwxV" + }, + "id": "SiCFxx7WcwxV", + "execution_count": 153, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "distance(solution)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nds6G5tmnJ8K", + "outputId": "fe4ccfc6-a5c0-4e53-c0cd-acfe62bd6052" + }, + "id": "nds6G5tmnJ8K", + "execution_count": 154, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "991.3343075204886" + ] + }, + "metadata": {}, + "execution_count": 154 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Which is not too bad for 975 cups of joe. 🥤" + ], + "metadata": { + "id": "bukLealWcxOz" + }, + "id": "bukLealWcxOz" + } + ], + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "sEtmfvLsvMC2", + "1sg-1A4Ih1L4" + ], + "toc_visible": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b42bcd0d5edf45507a8e4bab628d7f9f6124be7d Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 18:30:53 -0400 Subject: [PATCH 14/32] Metadata pyproject.toml File Create a metadata TOML pyproject.toml file for the project. --- pyproject.toml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4849c4a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[project] +name = "pygad" +version = "3.0.0" +description = "PyGAD: A Python Library for Building the Genetic Algorithm and Training Machine Learning Algoithms (Keras & PyTorch)." +readme = {file = "README.md", content-type = "text/markdown"} +requires-python = ">=3" +license = {file = "LICENSE"} +authors = [ + {name = "Ahmed Gad", email = "ahmed.f.gad@gmail.com"}, +] +maintainers = [ +{name = "Ahmed Gad", email = "ahmed.f.gad@gmail.com"} +] +classifiers = [ + "Programming Language :: Python" +] +keywords = ["genetic algorithm", "optimization", "natural evolution", "pygad", "machine learning", "deep learning", "neural networks", "tensorflow", "keras", "pytorch"] +dependencies = [ + "numpy", + "matplotlib", + "cloudpickle", +] + +[project.urls] +homepage = "https://github.com/ahmedfgad/GeneticAlgorithmPython" +documentation = "https://pygad.readthedocs.io" +github_repository = "https://github.com/ahmedfgad/GeneticAlgorithmPython" +pypi_project = "https://pypi.org/project/pygad" +conda_forge_project = "https://anaconda.org/conda-forge/pygad" +donation_stripe = "https://donate.stripe.com/eVa5kO866elKgM0144" +donation_open_collective = "https://opencollective.com/pygad" +donation_paypal = "http://paypal.me/ahmedfgad" + +[project.optional-dependencies] +deep_learning = ["keras", "torch"] From 97f3c1652218cb0aeae962e0e38d1ed81962d1da Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 18:57:59 -0400 Subject: [PATCH 15/32] Add more classifiers --- pyproject.toml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4849c4a..fc1ddb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,23 @@ maintainers = [ {name = "Ahmed Gad", email = "ahmed.f.gad@gmail.com"} ] classifiers = [ - "Programming Language :: Python" + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Natural Language :: English", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Utilities", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Other Audience" ] -keywords = ["genetic algorithm", "optimization", "natural evolution", "pygad", "machine learning", "deep learning", "neural networks", "tensorflow", "keras", "pytorch"] +keywords = ["genetic algorithm", "GA", "optimization", "evolutionary algorithm", "natural evolution", "pygad", "machine learning", "deep learning", "neural networks", "tensorflow", "keras", "pytorch"] dependencies = [ "numpy", "matplotlib", From 2b79ddb535f351828146750df3b8d0d73731c310 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 19:05:34 -0400 Subject: [PATCH 16/32] Edit links --- pyproject.toml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fc1ddb4..31a3fbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,14 +36,14 @@ dependencies = [ ] [project.urls] -homepage = "https://github.com/ahmedfgad/GeneticAlgorithmPython" -documentation = "https://pygad.readthedocs.io" -github_repository = "https://github.com/ahmedfgad/GeneticAlgorithmPython" -pypi_project = "https://pypi.org/project/pygad" -conda_forge_project = "https://anaconda.org/conda-forge/pygad" -donation_stripe = "https://donate.stripe.com/eVa5kO866elKgM0144" -donation_open_collective = "https://opencollective.com/pygad" -donation_paypal = "http://paypal.me/ahmedfgad" +"Homepage" = "https://github.com/ahmedfgad/GeneticAlgorithmPython" +"Documentation" = "https://pygad.readthedocs.io" +"GitHub Repository" = "https://github.com/ahmedfgad/GeneticAlgorithmPython" +"PyPI Project" = "https://pypi.org/project/pygad" +"Conda Forge Project" = "https://anaconda.org/conda-forge/pygad" +"Donation Stripe" = "https://donate.stripe.com/eVa5kO866elKgM0144" +"Donation Open Collective" = "https://opencollective.com/pygad" +"Donation Paypal" = "http://paypal.me/ahmedfgad" [project.optional-dependencies] deep_learning = ["keras", "torch"] From ab972476109cd1d52333907b2ac4a324e583ac65 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sun, 16 Apr 2023 19:07:22 -0400 Subject: [PATCH 17/32] Add [build-system] table --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 31a3fbf..2e20768 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + [project] name = "pygad" version = "3.0.0" From a1ab6aaeaff1c1001964e1950a29064072bb32c2 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 20 Apr 2023 09:48:26 -0400 Subject: [PATCH 18/32] Add tests --- pyproject.toml | 9 ++ tests/example_clustering_2.py | 122 +++++++++++++++++++++++++++ tests/example_clustering_3.py | 134 ++++++++++++++++++++++++++++++ tests/example_custom_operators.py | 74 +++++++++++++++++ tests/example_logger.py | 45 ++++++++++ tests/lifecycle.py | 48 +++++++++++ tests/test_example.py | 38 +++++++++ 7 files changed, 470 insertions(+) create mode 100644 tests/example_clustering_2.py create mode 100644 tests/example_clustering_3.py create mode 100644 tests/example_custom_operators.py create mode 100644 tests/example_logger.py create mode 100644 tests/lifecycle.py create mode 100644 tests/test_example.py diff --git a/pyproject.toml b/pyproject.toml index 2e20768..cc30824 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,3 +51,12 @@ dependencies = [ [project.optional-dependencies] deep_learning = ["keras", "torch"] + +# PyTest Configuration. Later, PyTest will support the [tool.pytest] table. +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q" +testpaths = [ + "tests", + "integration", +] diff --git a/tests/example_clustering_2.py b/tests/example_clustering_2.py new file mode 100644 index 0000000..877e318 --- /dev/null +++ b/tests/example_clustering_2.py @@ -0,0 +1,122 @@ +import numpy +import matplotlib.pyplot +import pygad + +cluster1_num_samples = 10 +cluster1_x1_start = 0 +cluster1_x1_end = 5 +cluster1_x2_start = 2 +cluster1_x2_end = 6 +cluster1_x1 = numpy.random.random(size=(cluster1_num_samples)) +cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start +cluster1_x2 = numpy.random.random(size=(cluster1_num_samples)) +cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start + +cluster2_num_samples = 10 +cluster2_x1_start = 10 +cluster2_x1_end = 15 +cluster2_x2_start = 8 +cluster2_x2_end = 12 +cluster2_x1 = numpy.random.random(size=(cluster2_num_samples)) +cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start +cluster2_x2 = numpy.random.random(size=(cluster2_num_samples)) +cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start + +c1 = numpy.array([cluster1_x1, cluster1_x2]).T +c2 = numpy.array([cluster2_x1, cluster2_x2]).T + +data = numpy.concatenate((c1, c2), axis=0) + +matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2) +matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2) +matplotlib.pyplot.title("Optimal Clustering") +matplotlib.pyplot.show() + +def euclidean_distance(X, Y): + """ + Calculate the euclidean distance between X and Y. It accepts: + :X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample. + :Y should be of size f. In other words, it is a single sample. + + Returns a vector of N elements with the distances between the N samples and the Y. + """ + + return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1)) + +def cluster_data(solution, solution_idx): + """ + Clusters the data based on the current solution. + """ + + global num_cluster, data + feature_vector_length = data.shape[1] + cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample. + all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples. + clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster. + clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster. + + for clust_idx in range(num_clusters): + # Return the current cluster center. + cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)]) + # Calculate the distance (e.g. euclidean) between the current cluster center and all samples. + cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx]) + all_clusters_dists.append(numpy.array(cluster_center_dists)) + + cluster_centers = numpy.array(cluster_centers) + all_clusters_dists = numpy.array(all_clusters_dists) + + # A 1D array that, for each sample, holds the index of the cluster with the smallest distance. + # In other words, the array holds the sample's cluster index. + cluster_indices = numpy.argmin(all_clusters_dists, axis=0) + for clust_idx in range(num_clusters): + clusters.append(numpy.where(cluster_indices == clust_idx)[0]) + # Calculate the sum of distances for the cluster. + if len(clusters[clust_idx]) == 0: + # In case the cluster is empty (i.e. has zero samples). + clusters_sum_dist.append(0) + else: + # When the cluster is not empty (i.e. has at least 1 sample). + clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]])) + # clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx]))) + + clusters_sum_dist = numpy.array(clusters_sum_dist) + + return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist + +def fitness_func(ga_instance, solution, solution_idx): + _, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx) + + # The tiny value 0.00000001 is added to the denominator in case the average distance is 0. + fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001) + + return fitness + +num_clusters = 2 +num_genes = num_clusters * data.shape[1] + +ga_instance = pygad.GA(num_generations=100, + sol_per_pop=10, + num_parents_mating=5, + init_range_low=-6, + init_range_high=20, + keep_parents=2, + num_genes=num_genes, + fitness_func=fitness_func, + suppress_warnings=True) + +ga_instance.run() + +best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() +print("Best solution is {bs}".format(bs=best_solution)) +print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) +print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) + +cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) + +for cluster_idx in range(num_clusters): + cluster_x = data[clusters[cluster_idx], 0] + cluster_y = data[clusters[cluster_idx], 1] + matplotlib.pyplot.scatter(cluster_x, cluster_y) + matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5) +matplotlib.pyplot.title("Clustering using PyGAD") +matplotlib.pyplot.show() diff --git a/tests/example_clustering_3.py b/tests/example_clustering_3.py new file mode 100644 index 0000000..608d54b --- /dev/null +++ b/tests/example_clustering_3.py @@ -0,0 +1,134 @@ +import numpy +import matplotlib.pyplot +import pygad + +cluster1_num_samples = 20 +cluster1_x1_start = 0 +cluster1_x1_end = 5 +cluster1_x2_start = 2 +cluster1_x2_end = 6 +cluster1_x1 = numpy.random.random(size=(cluster1_num_samples)) +cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start +cluster1_x2 = numpy.random.random(size=(cluster1_num_samples)) +cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start + +cluster2_num_samples = 20 +cluster2_x1_start = 4 +cluster2_x1_end = 12 +cluster2_x2_start = 14 +cluster2_x2_end = 18 +cluster2_x1 = numpy.random.random(size=(cluster2_num_samples)) +cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start +cluster2_x2 = numpy.random.random(size=(cluster2_num_samples)) +cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start + +cluster3_num_samples = 20 +cluster3_x1_start = 12 +cluster3_x1_end = 18 +cluster3_x2_start = 8 +cluster3_x2_end = 11 +cluster3_x1 = numpy.random.random(size=(cluster3_num_samples)) +cluster3_x1 = cluster3_x1 * (cluster3_x1_end - cluster3_x1_start) + cluster3_x1_start +cluster3_x2 = numpy.random.random(size=(cluster3_num_samples)) +cluster3_x2 = cluster3_x2 * (cluster3_x2_end - cluster3_x2_start) + cluster3_x2_start + +c1 = numpy.array([cluster1_x1, cluster1_x2]).T +c2 = numpy.array([cluster2_x1, cluster2_x2]).T +c3 = numpy.array([cluster3_x1, cluster3_x2]).T + +data = numpy.concatenate((c1, c2, c3), axis=0) + +matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2) +matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2) +matplotlib.pyplot.scatter(cluster3_x1, cluster3_x2) +matplotlib.pyplot.title("Optimal Clustering") +matplotlib.pyplot.show() + +def euclidean_distance(X, Y): + """ + Calculate the euclidean distance between X and Y. It accepts: + :X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample. + :Y should be of size f. In other words, it is a single sample. + + Returns a vector of N elements with the distances between the N samples and the Y. + """ + + return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1)) + +def cluster_data(solution, solution_idx): + """ + Clusters the data based on the current solution. + """ + + global num_clusters, feature_vector_length, data + cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample. + all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples. + clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster. + clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster. + + for clust_idx in range(num_clusters): + # Return the current cluster center. + cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)]) + # Calculate the distance (e.g. euclidean) between the current cluster center and all samples. + cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx]) + all_clusters_dists.append(numpy.array(cluster_center_dists)) + + cluster_centers = numpy.array(cluster_centers) + all_clusters_dists = numpy.array(all_clusters_dists) + + # A 1D array that, for each sample, holds the index of the cluster with the smallest distance. + # In other words, the array holds the sample's cluster index. + cluster_indices = numpy.argmin(all_clusters_dists, axis=0) + for clust_idx in range(num_clusters): + clusters.append(numpy.where(cluster_indices == clust_idx)[0]) + # Calculate the sum of distances for the cluster. + if len(clusters[clust_idx]) == 0: + # In case the cluster is empty (i.e. has zero samples). + clusters_sum_dist.append(0) + else: + # When the cluster is not empty (i.e. has at least 1 sample). + clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]])) + # clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx]))) + + clusters_sum_dist = numpy.array(clusters_sum_dist) + + return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist + +def fitness_func(ga_instance, solution, solution_idx): + _, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx) + + # The tiny value 0.00000001 is added to the denominator in case the average distance is 0. + fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001) + + return fitness + +num_clusters = 3 +feature_vector_length = data.shape[1] +num_genes = num_clusters * feature_vector_length + +ga_instance = pygad.GA(num_generations=100, + sol_per_pop=10, + init_range_low=0, + init_range_high=20, + num_parents_mating=5, + keep_parents=2, + num_genes=num_genes, + fitness_func=fitness_func, + suppress_warnings=True) + +ga_instance.run() + +best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() +print("Best solution is {bs}".format(bs=best_solution)) +print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) +print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) + +cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) + +for cluster_idx in range(num_clusters): + cluster_x = data[clusters[cluster_idx], 0] + cluster_y = data[clusters[cluster_idx], 1] + matplotlib.pyplot.scatter(cluster_x, cluster_y) + matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5) +matplotlib.pyplot.title("Clustering using PyGAD") +matplotlib.pyplot.show() diff --git a/tests/example_custom_operators.py b/tests/example_custom_operators.py new file mode 100644 index 0000000..7261a54 --- /dev/null +++ b/tests/example_custom_operators.py @@ -0,0 +1,74 @@ +import pygad +import numpy + +""" +This script gives an example of using custom user-defined functions for the 3 operators: + 1) Parent selection. + 2) Crossover. + 3) Mutation. +For more information, check the User-Defined Crossover, Mutation, and Parent Selection Operators section in the documentation: + https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#user-defined-crossover-mutation-and-parent-selection-operators +""" + +equation_inputs = [4,-2,3.5] +desired_output = 44 + +def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + + return fitness + +def parent_selection_func(fitness, num_parents, ga_instance): + # Selects the best {num_parents} parents. Works as steady-state selection. + + fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) + fitness_sorted.reverse() + + parents = numpy.empty((num_parents, ga_instance.population.shape[1])) + + for parent_num in range(num_parents): + parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() + + return parents, numpy.array(fitness_sorted[:num_parents]) + +def crossover_func(parents, offspring_size, ga_instance): + # This is single-point crossover. + offspring = [] + idx = 0 + while len(offspring) != offspring_size[0]: + parent1 = parents[idx % parents.shape[0], :].copy() + parent2 = parents[(idx + 1) % parents.shape[0], :].copy() + + random_split_point = numpy.random.choice(range(offspring_size[0])) + + parent1[random_split_point:] = parent2[random_split_point:] + + offspring.append(parent1) + + idx += 1 + + return numpy.array(offspring) + +def mutation_func(offspring, ga_instance): + # This is random mutation that mutates a single gene. + for chromosome_idx in range(offspring.shape[0]): + # Make some random changes in 1 or more genes. + random_gene_idx = numpy.random.choice(range(offspring.shape[1])) + + offspring[chromosome_idx, random_gene_idx] += numpy.random.random() + + return offspring + +ga_instance = pygad.GA(num_generations=10, + sol_per_pop=5, + num_parents_mating=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + parent_selection_type=parent_selection_func, + crossover_type=crossover_func, + mutation_type=mutation_func) + +ga_instance.run() +ga_instance.plot_fitness() diff --git a/tests/example_logger.py b/tests/example_logger.py new file mode 100644 index 0000000..d38a179 --- /dev/null +++ b/tests/example_logger.py @@ -0,0 +1,45 @@ +import logging +import pygad +import numpy + +level = logging.DEBUG +name = 'logfile.txt' + +logger = logging.getLogger(name) +logger.setLevel(level) + +file_handler = logging.FileHandler(name,'a+','utf-8') +file_handler.setLevel(logging.DEBUG) +file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') +file_handler.setFormatter(file_format) +logger.addHandler(file_handler) + +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +console_format = logging.Formatter('%(message)s') +console_handler.setFormatter(console_format) +logger.addHandler(console_handler) + +equation_inputs = [4, -2, 8] +desired_output = 2671.1234 + +def fitness_func(ga_instance, solution, solution_idx): + output = numpy.sum(solution * equation_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + +def on_generation(ga_instance): + ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) + ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) + +ga_instance = pygad.GA(num_generations=10, + sol_per_pop=40, + num_parents_mating=2, + keep_parents=2, + num_genes=len(equation_inputs), + fitness_func=fitness_func, + on_generation=on_generation, + logger=logger) +ga_instance.run() + +logger.handlers.clear() diff --git a/tests/lifecycle.py b/tests/lifecycle.py new file mode 100644 index 0000000..8eeae5b --- /dev/null +++ b/tests/lifecycle.py @@ -0,0 +1,48 @@ +import pygad +import numpy + +function_inputs = [4,-2,3.5,5,-11,-4.7] +desired_output = 44 + +def fitness_func(ga_instanse, solution, solution_idx): + output = numpy.sum(solution*function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + +fitness_function = fitness_func + +def on_start(ga_instance): + print("on_start()") + +def on_fitness(ga_instance, population_fitness): + print("on_fitness()") + +def on_parents(ga_instance, selected_parents): + print("on_parents()") + +def on_crossover(ga_instance, offspring_crossover): + print("on_crossover()") + +def on_mutation(ga_instance, offspring_mutation): + print("on_mutation()") + +def on_generation(ga_instance): + print("on_generation()") + +def on_stop(ga_instance, last_population_fitness): + print("on_stop") + +ga_instance = pygad.GA(num_generations=3, + num_parents_mating=5, + fitness_func=fitness_function, + sol_per_pop=10, + num_genes=len(function_inputs), + on_start=on_start, + on_fitness=on_fitness, + on_parents=on_parents, + on_crossover=on_crossover, + on_mutation=on_mutation, + on_generation=on_generation, + on_stop=on_stop) + +ga_instance.run() diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..67c82f2 --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,38 @@ +import pygad +import numpy as np + +def fitness_func(ga_instance, x, x_idx): + rng_noise = np.random.default_rng(678910) + dummy_fit = rng_noise.random()*100 + x = np.sort(x) + return dummy_fit + + +gene_space = np.arange(1,41,1) + +ga_instance = pygad.GA(num_generations = 20, + num_parents_mating = 40, + sol_per_pop = 50, + num_genes = 6, + init_range_low = gene_space[0], + init_range_high = gene_space[-1], + gene_space = gene_space, + gene_type = int, + keep_elitism = 2, + mutation_probability = 0.025, + fitness_func = fitness_func, + save_solutions = True, + allow_duplicate_genes = True, + suppress_warnings=True, + random_seed=12345) +ga_instance.run() + +trial = ga_instance.solutions +trial = np.sort(trial) + +unique_genes = [] +for i_genes in range(trial.shape[0]): + unique_genes.append(np.unique(trial[i_genes,:])) + +for unique_gene in unique_genes: + assert len(unique_gene) == len(trial[0]) From c96411865f117a2ed9bf12389dd95e9377bf4888 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 25 Apr 2023 16:27:28 -0400 Subject: [PATCH 19/32] PyTest metadata --- pyproject.toml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cc30824..f7afdf5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,9 +54,4 @@ deep_learning = ["keras", "torch"] # PyTest Configuration. Later, PyTest will support the [tool.pytest] table. [tool.pytest.ini_options] -minversion = "6.0" -addopts = "-ra -q" -testpaths = [ - "tests", - "integration", -] +testpaths = ["tests"] \ No newline at end of file From 44cf9fb997c270f2749f1bcaf931d3801186af31 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 25 Apr 2023 16:31:34 -0400 Subject: [PATCH 20/32] Running tests using GitHub Actions --- .github/workflows/main.yml | 27 ++++ tests/example_clustering_2.py | 122 ------------------ tests/example_clustering_3.py | 134 -------------------- tests/example_custom_operators.py | 74 ----------- tests/example_logger.py | 45 ------- tests/lifecycle.py | 48 ------- tests/test_example.py | 38 ------ tests/test_number_fitness_function_calls.py | 85 +++++++++++++ 8 files changed, 112 insertions(+), 461 deletions(-) delete mode 100644 tests/example_clustering_2.py delete mode 100644 tests/example_clustering_3.py delete mode 100644 tests/example_custom_operators.py delete mode 100644 tests/example_logger.py delete mode 100644 tests/lifecycle.py delete mode 100644 tests/test_example.py create mode 100644 tests/test_number_fitness_function_calls.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8b13789..099980d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1 +1,28 @@ +name: Testing PyGAD using PyTest +on: + push: + branches: + - main + - master + +jobs: + job_id_1: + runs-on: ubuntu-latest + name: PyTest Workflow Job + + steps: + - name: Checkout Pre-Built Action + uses: actions/checkout@v3 + + - name: Setup Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install PyTest + run: pip install pytest + + - name: Call PyTest + run: | + pytest diff --git a/tests/example_clustering_2.py b/tests/example_clustering_2.py deleted file mode 100644 index 877e318..0000000 --- a/tests/example_clustering_2.py +++ /dev/null @@ -1,122 +0,0 @@ -import numpy -import matplotlib.pyplot -import pygad - -cluster1_num_samples = 10 -cluster1_x1_start = 0 -cluster1_x1_end = 5 -cluster1_x2_start = 2 -cluster1_x2_end = 6 -cluster1_x1 = numpy.random.random(size=(cluster1_num_samples)) -cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start -cluster1_x2 = numpy.random.random(size=(cluster1_num_samples)) -cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start - -cluster2_num_samples = 10 -cluster2_x1_start = 10 -cluster2_x1_end = 15 -cluster2_x2_start = 8 -cluster2_x2_end = 12 -cluster2_x1 = numpy.random.random(size=(cluster2_num_samples)) -cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start -cluster2_x2 = numpy.random.random(size=(cluster2_num_samples)) -cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start - -c1 = numpy.array([cluster1_x1, cluster1_x2]).T -c2 = numpy.array([cluster2_x1, cluster2_x2]).T - -data = numpy.concatenate((c1, c2), axis=0) - -matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2) -matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2) -matplotlib.pyplot.title("Optimal Clustering") -matplotlib.pyplot.show() - -def euclidean_distance(X, Y): - """ - Calculate the euclidean distance between X and Y. It accepts: - :X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample. - :Y should be of size f. In other words, it is a single sample. - - Returns a vector of N elements with the distances between the N samples and the Y. - """ - - return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1)) - -def cluster_data(solution, solution_idx): - """ - Clusters the data based on the current solution. - """ - - global num_cluster, data - feature_vector_length = data.shape[1] - cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample. - all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples. - clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster. - clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster. - - for clust_idx in range(num_clusters): - # Return the current cluster center. - cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)]) - # Calculate the distance (e.g. euclidean) between the current cluster center and all samples. - cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx]) - all_clusters_dists.append(numpy.array(cluster_center_dists)) - - cluster_centers = numpy.array(cluster_centers) - all_clusters_dists = numpy.array(all_clusters_dists) - - # A 1D array that, for each sample, holds the index of the cluster with the smallest distance. - # In other words, the array holds the sample's cluster index. - cluster_indices = numpy.argmin(all_clusters_dists, axis=0) - for clust_idx in range(num_clusters): - clusters.append(numpy.where(cluster_indices == clust_idx)[0]) - # Calculate the sum of distances for the cluster. - if len(clusters[clust_idx]) == 0: - # In case the cluster is empty (i.e. has zero samples). - clusters_sum_dist.append(0) - else: - # When the cluster is not empty (i.e. has at least 1 sample). - clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]])) - # clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx]))) - - clusters_sum_dist = numpy.array(clusters_sum_dist) - - return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist - -def fitness_func(ga_instance, solution, solution_idx): - _, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx) - - # The tiny value 0.00000001 is added to the denominator in case the average distance is 0. - fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001) - - return fitness - -num_clusters = 2 -num_genes = num_clusters * data.shape[1] - -ga_instance = pygad.GA(num_generations=100, - sol_per_pop=10, - num_parents_mating=5, - init_range_low=-6, - init_range_high=20, - keep_parents=2, - num_genes=num_genes, - fitness_func=fitness_func, - suppress_warnings=True) - -ga_instance.run() - -best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() -print("Best solution is {bs}".format(bs=best_solution)) -print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) -print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) - -cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) - -for cluster_idx in range(num_clusters): - cluster_x = data[clusters[cluster_idx], 0] - cluster_y = data[clusters[cluster_idx], 1] - matplotlib.pyplot.scatter(cluster_x, cluster_y) - matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5) -matplotlib.pyplot.title("Clustering using PyGAD") -matplotlib.pyplot.show() diff --git a/tests/example_clustering_3.py b/tests/example_clustering_3.py deleted file mode 100644 index 608d54b..0000000 --- a/tests/example_clustering_3.py +++ /dev/null @@ -1,134 +0,0 @@ -import numpy -import matplotlib.pyplot -import pygad - -cluster1_num_samples = 20 -cluster1_x1_start = 0 -cluster1_x1_end = 5 -cluster1_x2_start = 2 -cluster1_x2_end = 6 -cluster1_x1 = numpy.random.random(size=(cluster1_num_samples)) -cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start -cluster1_x2 = numpy.random.random(size=(cluster1_num_samples)) -cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start - -cluster2_num_samples = 20 -cluster2_x1_start = 4 -cluster2_x1_end = 12 -cluster2_x2_start = 14 -cluster2_x2_end = 18 -cluster2_x1 = numpy.random.random(size=(cluster2_num_samples)) -cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start -cluster2_x2 = numpy.random.random(size=(cluster2_num_samples)) -cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start - -cluster3_num_samples = 20 -cluster3_x1_start = 12 -cluster3_x1_end = 18 -cluster3_x2_start = 8 -cluster3_x2_end = 11 -cluster3_x1 = numpy.random.random(size=(cluster3_num_samples)) -cluster3_x1 = cluster3_x1 * (cluster3_x1_end - cluster3_x1_start) + cluster3_x1_start -cluster3_x2 = numpy.random.random(size=(cluster3_num_samples)) -cluster3_x2 = cluster3_x2 * (cluster3_x2_end - cluster3_x2_start) + cluster3_x2_start - -c1 = numpy.array([cluster1_x1, cluster1_x2]).T -c2 = numpy.array([cluster2_x1, cluster2_x2]).T -c3 = numpy.array([cluster3_x1, cluster3_x2]).T - -data = numpy.concatenate((c1, c2, c3), axis=0) - -matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2) -matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2) -matplotlib.pyplot.scatter(cluster3_x1, cluster3_x2) -matplotlib.pyplot.title("Optimal Clustering") -matplotlib.pyplot.show() - -def euclidean_distance(X, Y): - """ - Calculate the euclidean distance between X and Y. It accepts: - :X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample. - :Y should be of size f. In other words, it is a single sample. - - Returns a vector of N elements with the distances between the N samples and the Y. - """ - - return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1)) - -def cluster_data(solution, solution_idx): - """ - Clusters the data based on the current solution. - """ - - global num_clusters, feature_vector_length, data - cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample. - all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples. - clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster. - clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster. - - for clust_idx in range(num_clusters): - # Return the current cluster center. - cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)]) - # Calculate the distance (e.g. euclidean) between the current cluster center and all samples. - cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx]) - all_clusters_dists.append(numpy.array(cluster_center_dists)) - - cluster_centers = numpy.array(cluster_centers) - all_clusters_dists = numpy.array(all_clusters_dists) - - # A 1D array that, for each sample, holds the index of the cluster with the smallest distance. - # In other words, the array holds the sample's cluster index. - cluster_indices = numpy.argmin(all_clusters_dists, axis=0) - for clust_idx in range(num_clusters): - clusters.append(numpy.where(cluster_indices == clust_idx)[0]) - # Calculate the sum of distances for the cluster. - if len(clusters[clust_idx]) == 0: - # In case the cluster is empty (i.e. has zero samples). - clusters_sum_dist.append(0) - else: - # When the cluster is not empty (i.e. has at least 1 sample). - clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]])) - # clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx]))) - - clusters_sum_dist = numpy.array(clusters_sum_dist) - - return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist - -def fitness_func(ga_instance, solution, solution_idx): - _, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx) - - # The tiny value 0.00000001 is added to the denominator in case the average distance is 0. - fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001) - - return fitness - -num_clusters = 3 -feature_vector_length = data.shape[1] -num_genes = num_clusters * feature_vector_length - -ga_instance = pygad.GA(num_generations=100, - sol_per_pop=10, - init_range_low=0, - init_range_high=20, - num_parents_mating=5, - keep_parents=2, - num_genes=num_genes, - fitness_func=fitness_func, - suppress_warnings=True) - -ga_instance.run() - -best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution() -print("Best solution is {bs}".format(bs=best_solution)) -print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness)) -print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation)) - -cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx) - -for cluster_idx in range(num_clusters): - cluster_x = data[clusters[cluster_idx], 0] - cluster_y = data[clusters[cluster_idx], 1] - matplotlib.pyplot.scatter(cluster_x, cluster_y) - matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5) -matplotlib.pyplot.title("Clustering using PyGAD") -matplotlib.pyplot.show() diff --git a/tests/example_custom_operators.py b/tests/example_custom_operators.py deleted file mode 100644 index 7261a54..0000000 --- a/tests/example_custom_operators.py +++ /dev/null @@ -1,74 +0,0 @@ -import pygad -import numpy - -""" -This script gives an example of using custom user-defined functions for the 3 operators: - 1) Parent selection. - 2) Crossover. - 3) Mutation. -For more information, check the User-Defined Crossover, Mutation, and Parent Selection Operators section in the documentation: - https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#user-defined-crossover-mutation-and-parent-selection-operators -""" - -equation_inputs = [4,-2,3.5] -desired_output = 44 - -def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - - return fitness - -def parent_selection_func(fitness, num_parents, ga_instance): - # Selects the best {num_parents} parents. Works as steady-state selection. - - fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k]) - fitness_sorted.reverse() - - parents = numpy.empty((num_parents, ga_instance.population.shape[1])) - - for parent_num in range(num_parents): - parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy() - - return parents, numpy.array(fitness_sorted[:num_parents]) - -def crossover_func(parents, offspring_size, ga_instance): - # This is single-point crossover. - offspring = [] - idx = 0 - while len(offspring) != offspring_size[0]: - parent1 = parents[idx % parents.shape[0], :].copy() - parent2 = parents[(idx + 1) % parents.shape[0], :].copy() - - random_split_point = numpy.random.choice(range(offspring_size[0])) - - parent1[random_split_point:] = parent2[random_split_point:] - - offspring.append(parent1) - - idx += 1 - - return numpy.array(offspring) - -def mutation_func(offspring, ga_instance): - # This is random mutation that mutates a single gene. - for chromosome_idx in range(offspring.shape[0]): - # Make some random changes in 1 or more genes. - random_gene_idx = numpy.random.choice(range(offspring.shape[1])) - - offspring[chromosome_idx, random_gene_idx] += numpy.random.random() - - return offspring - -ga_instance = pygad.GA(num_generations=10, - sol_per_pop=5, - num_parents_mating=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - parent_selection_type=parent_selection_func, - crossover_type=crossover_func, - mutation_type=mutation_func) - -ga_instance.run() -ga_instance.plot_fitness() diff --git a/tests/example_logger.py b/tests/example_logger.py deleted file mode 100644 index d38a179..0000000 --- a/tests/example_logger.py +++ /dev/null @@ -1,45 +0,0 @@ -import logging -import pygad -import numpy - -level = logging.DEBUG -name = 'logfile.txt' - -logger = logging.getLogger(name) -logger.setLevel(level) - -file_handler = logging.FileHandler(name,'a+','utf-8') -file_handler.setLevel(logging.DEBUG) -file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') -file_handler.setFormatter(file_format) -logger.addHandler(file_handler) - -console_handler = logging.StreamHandler() -console_handler.setLevel(logging.INFO) -console_format = logging.Formatter('%(message)s') -console_handler.setFormatter(console_format) -logger.addHandler(console_handler) - -equation_inputs = [4, -2, 8] -desired_output = 2671.1234 - -def fitness_func(ga_instance, solution, solution_idx): - output = numpy.sum(solution * equation_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - -def on_generation(ga_instance): - ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed)) - ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1])) - -ga_instance = pygad.GA(num_generations=10, - sol_per_pop=40, - num_parents_mating=2, - keep_parents=2, - num_genes=len(equation_inputs), - fitness_func=fitness_func, - on_generation=on_generation, - logger=logger) -ga_instance.run() - -logger.handlers.clear() diff --git a/tests/lifecycle.py b/tests/lifecycle.py deleted file mode 100644 index 8eeae5b..0000000 --- a/tests/lifecycle.py +++ /dev/null @@ -1,48 +0,0 @@ -import pygad -import numpy - -function_inputs = [4,-2,3.5,5,-11,-4.7] -desired_output = 44 - -def fitness_func(ga_instanse, solution, solution_idx): - output = numpy.sum(solution*function_inputs) - fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) - return fitness - -fitness_function = fitness_func - -def on_start(ga_instance): - print("on_start()") - -def on_fitness(ga_instance, population_fitness): - print("on_fitness()") - -def on_parents(ga_instance, selected_parents): - print("on_parents()") - -def on_crossover(ga_instance, offspring_crossover): - print("on_crossover()") - -def on_mutation(ga_instance, offspring_mutation): - print("on_mutation()") - -def on_generation(ga_instance): - print("on_generation()") - -def on_stop(ga_instance, last_population_fitness): - print("on_stop") - -ga_instance = pygad.GA(num_generations=3, - num_parents_mating=5, - fitness_func=fitness_function, - sol_per_pop=10, - num_genes=len(function_inputs), - on_start=on_start, - on_fitness=on_fitness, - on_parents=on_parents, - on_crossover=on_crossover, - on_mutation=on_mutation, - on_generation=on_generation, - on_stop=on_stop) - -ga_instance.run() diff --git a/tests/test_example.py b/tests/test_example.py deleted file mode 100644 index 67c82f2..0000000 --- a/tests/test_example.py +++ /dev/null @@ -1,38 +0,0 @@ -import pygad -import numpy as np - -def fitness_func(ga_instance, x, x_idx): - rng_noise = np.random.default_rng(678910) - dummy_fit = rng_noise.random()*100 - x = np.sort(x) - return dummy_fit - - -gene_space = np.arange(1,41,1) - -ga_instance = pygad.GA(num_generations = 20, - num_parents_mating = 40, - sol_per_pop = 50, - num_genes = 6, - init_range_low = gene_space[0], - init_range_high = gene_space[-1], - gene_space = gene_space, - gene_type = int, - keep_elitism = 2, - mutation_probability = 0.025, - fitness_func = fitness_func, - save_solutions = True, - allow_duplicate_genes = True, - suppress_warnings=True, - random_seed=12345) -ga_instance.run() - -trial = ga_instance.solutions -trial = np.sort(trial) - -unique_genes = [] -for i_genes in range(trial.shape[0]): - unique_genes.append(np.unique(trial[i_genes,:])) - -for unique_gene in unique_genes: - assert len(unique_gene) == len(trial[0]) diff --git a/tests/test_number_fitness_function_calls.py b/tests/test_number_fitness_function_calls.py new file mode 100644 index 0000000..8863632 --- /dev/null +++ b/tests/test_number_fitness_function_calls.py @@ -0,0 +1,85 @@ +import pygad + +actual_num_fitness_calls_default_keep = 0 +actual_num_fitness_calls_no_keep = 0 +actual_num_fitness_calls_keep_elitism = 0 +actual_num_fitness_calls_keep_parents = 0 + +num_generations = 100 +sol_per_pop = 10 +num_parents_mating = 5 + +def number_calls_fitness_function(keep_elitism, + keep_parents): + + actual_num_fitness_calls = 0 + def fitness_func(ga, solution, idx): + nonlocal actual_num_fitness_calls + actual_num_fitness_calls = actual_num_fitness_calls + 1 + return 1 + + ga_optimizer = pygad.GA(num_generations=num_generations, + sol_per_pop=sol_per_pop, + num_genes=6, + num_parents_mating=num_parents_mating, + fitness_func=fitness_func, + keep_elitism=keep_elitism, + keep_parents=keep_parents, + suppress_warnings=True) + + ga_optimizer.run() + if keep_elitism == 0: + if keep_parents == 0: + # 10 (for initial population) + 100*10 (for other generations) = 1010 + expected_num_fitness_calls = sol_per_pop + num_generations * sol_per_pop + elif keep_parents == -1: + # 10 (for initial population) + 100*num_parents_mating (for other generations) + expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - num_parents_mating) + else: + # 10 (for initial population) + 100*keep_parents (for other generations) + expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - keep_parents) + else: + # 10 (for initial population) + 100*keep_elitism (for other generations) + expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - keep_elitism) + + print("Expected number of fitness function calls is {expected_num_fitness_calls}.".format(expected_num_fitness_calls=expected_num_fitness_calls)) + print("Actual number of fitness function calls is {actual_num_fitness_calls}.".format(actual_num_fitness_calls=actual_num_fitness_calls)) + return actual_num_fitness_calls, expected_num_fitness_calls + +def test_number_calls_fitness_function_default_keep(): + actual, expected = number_calls_fitness_function(keep_elitism=1, + keep_parents=-1) + assert actual == expected + +def test_number_calls_fitness_function_no_keep(): + actual, expected = number_calls_fitness_function(keep_elitism=0, + keep_parents=0) + assert actual == expected + +def test_number_calls_fitness_function_keep_elitism(): + actual, expected = number_calls_fitness_function(keep_elitism=3, + keep_parents=0) + assert actual == expected + +def test_number_calls_fitness_function_keep_parents(): + actual, expected = number_calls_fitness_function(keep_elitism=0, + keep_parents=4) + assert actual == expected + +def test_number_calls_fitness_function_both_keep(): + actual, expected = number_calls_fitness_function(keep_elitism=3, + keep_parents=4) + assert actual == expected + +if __name__ == "__main__": + print() + test_number_calls_fitness_function_default_keep() + print() + test_number_calls_fitness_function_no_keep() + print() + test_number_calls_fitness_function_keep_elitism() + print() + test_number_calls_fitness_function_keep_parents() + print() + test_number_calls_fitness_function_both_keep() + print() From 36b7bc3126123dd15f79180fa50cde38da0a7fa7 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 25 Apr 2023 16:32:52 -0400 Subject: [PATCH 21/32] Running tests on github-actions branch --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 099980d..6a4aae4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,8 +3,8 @@ name: Testing PyGAD using PyTest on: push: branches: - - main - - master + - github-actions + # - master jobs: job_id_1: From b3d55d27c751dd986e39d536efc25126445b70dd Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 25 Apr 2023 16:45:35 -0400 Subject: [PATCH 22/32] Build and install PyGAD in Actions Workflow --- .github/workflows/main.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6a4aae4..7e45fa9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,10 +19,19 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.10' - + + - name: Build PyGAD from the Repository + run: | + python3 -m pip install --upgrade build + python3 -m build + + - name: Install PyGAD after Building the .whl File + run: | + find ./dist/*.whl | xargs pip install + - name: Install PyTest run: pip install pytest - - name: Call PyTest + - name: Run the Tests by Calling PyTest run: | pytest From 6e19c042127adb4098d022660679175f6c7d8c94 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 27 Apr 2023 19:09:34 -0400 Subject: [PATCH 23/32] Update the version --- pyproject.toml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7afdf5..413e391 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pygad" -version = "3.0.0" +version = "3.0.1" description = "PyGAD: A Python Library for Building the Genetic Algorithm and Training Machine Learning Algoithms (Keras & PyTorch)." readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3" diff --git a/setup.py b/setup.py index 7ce2f32..4978a72 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pygad", - version="3.0.0", + version="3.0.1", author="Ahmed Fawzy Gad", install_requires=["numpy", "matplotlib", "cloudpickle",], author_email="ahmed.f.gad@gmail.com", From 262463ee85259cc167cd3935ea331004bf3fcbe1 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 27 Apr 2023 19:57:27 -0400 Subject: [PATCH 24/32] Test number of callbacks calls --- tests/test_lifecycle.py | 182 ++++++++++++++++++++ tests/test_number_fitness_function_calls.py | 52 +++++- 2 files changed, 226 insertions(+), 8 deletions(-) create mode 100644 tests/test_lifecycle.py diff --git a/tests/test_lifecycle.py b/tests/test_lifecycle.py new file mode 100644 index 0000000..65b38c5 --- /dev/null +++ b/tests/test_lifecycle.py @@ -0,0 +1,182 @@ +import pygad + +num_generations = 100 + +def number_lifecycle_callback_functions_calls(stop_criteria=None, + on_generation_stop=None): + actual_num_callbacks_calls = 0 + + def fitness_func(ga_instanse, solution, solution_idx): + return 1 + + def on_start(ga_instance): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_fitness(ga_instance, population_fitness): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_parents(ga_instance, selected_parents): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_crossover(ga_instance, offspring_crossover): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_mutation(ga_instance, offspring_mutation): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_generation(ga_instance): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + if on_generation_stop: + if ga_instance.generations_completed == on_generation_stop: + return "stop" + + def on_stop(ga_instance, last_population_fitness): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=fitness_func, + sol_per_pop=10, + num_genes=5, + on_start=on_start, + on_fitness=on_fitness, + on_parents=on_parents, + on_crossover=on_crossover, + on_mutation=on_mutation, + on_generation=on_generation, + on_stop=on_stop, + stop_criteria=stop_criteria, + suppress_warnings=True) + + ga_instance.run() + + # The total number is: + # 1 [for on_start()] + + # num_generations [for on_fitness()] + + # num_generations [for on_parents()] + + # num_generations [for on_crossover()] + + # num_generations [for on_mutation()] + + # num_generations [for on_generation()] + + # 1 [for on_stop()] + # = 1 + num_generations * 5 + 1 + + # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. + expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 + + print("Expected number of callbacks calls is {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) + print("Actual number of callbacks calls is {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + return actual_num_callbacks_calls, expected_num_callbacks_calls + +def number_lifecycle_callback_methods_calls(stop_criteria=None, + on_generation_stop=None): + actual_num_callbacks_calls = 0 + + class Callbacks: + def fitness_func(self, ga_instanse, solution, solution_idx): + return 1 + + def on_start(self, ga_instance): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_fitness(self, ga_instance, population_fitness): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_parents(self, ga_instance, selected_parents): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_crossover(self, ga_instance, offspring_crossover): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_mutation(self, ga_instance, offspring_mutation): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + def on_generation(self, ga_instance): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + if on_generation_stop: + if ga_instance.generations_completed == on_generation_stop: + return "stop" + + def on_stop(self, ga_instance, last_population_fitness): + nonlocal actual_num_callbacks_calls + actual_num_callbacks_calls = actual_num_callbacks_calls + 1 + + Callbacks_obj = Callbacks() + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=Callbacks_obj.fitness_func, + sol_per_pop=10, + num_genes=5, + on_start=Callbacks_obj.on_start, + on_fitness=Callbacks_obj.on_fitness, + on_parents=Callbacks_obj.on_parents, + on_crossover=Callbacks_obj.on_crossover, + on_mutation=Callbacks_obj.on_mutation, + on_generation=Callbacks_obj.on_generation, + on_stop=Callbacks_obj.on_stop, + stop_criteria=stop_criteria, + suppress_warnings=True) + + ga_instance.run() + + # The total number is: + # 1 [for on_start()] + + # num_generations [for on_fitness()] + + # num_generations [for on_parents()] + + # num_generations [for on_crossover()] + + # num_generations [for on_mutation()] + + # num_generations [for on_generation()] + + # 1 [for on_stop()] + # = 1 + num_generations * 5 + 1 + + # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. + expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 + + print("Expected number of callbacks calls is {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) + print("Actual number of callbacks calls is {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + return actual_num_callbacks_calls, expected_num_callbacks_calls + +def test_number_lifecycle_callback_functions_calls(): + actual, expected = number_lifecycle_callback_functions_calls() + + assert actual == expected + +def test_number_lifecycle_callback_functions_calls_stop_criteria(): + actual, expected = number_lifecycle_callback_functions_calls(on_generation_stop=30) + + assert actual == expected + +def test_number_lifecycle_callback_methods_calls(): + actual, expected = number_lifecycle_callback_methods_calls() + + assert actual == expected + +def test_number_lifecycle_callback_methods_calls_stop_criteria(): + actual, expected = number_lifecycle_callback_methods_calls(on_generation_stop=30) + + assert actual == expected + +if __name__ == "__main__": + print() + test_number_lifecycle_callback_functions_calls() + print() + test_number_lifecycle_callback_functions_calls_stop_criteria() + print() + test_number_lifecycle_callback_methods_calls() + print() + test_number_lifecycle_callback_methods_calls_stop_criteria() + print() diff --git a/tests/test_number_fitness_function_calls.py b/tests/test_number_fitness_function_calls.py index 8863632..c628606 100644 --- a/tests/test_number_fitness_function_calls.py +++ b/tests/test_number_fitness_function_calls.py @@ -9,8 +9,10 @@ sol_per_pop = 10 num_parents_mating = 5 -def number_calls_fitness_function(keep_elitism, - keep_parents): +def number_calls_fitness_function(keep_elitism=1, + keep_parents=-1, + mutation_type="random", + mutation_percent_genes="default"): actual_num_fitness_calls = 0 def fitness_func(ga, solution, idx): @@ -23,6 +25,8 @@ def fitness_func(ga, solution, idx): num_genes=6, num_parents_mating=num_parents_mating, fitness_func=fitness_func, + mutation_type=mutation_type, + mutation_percent_genes=mutation_percent_genes, keep_elitism=keep_elitism, keep_parents=keep_parents, suppress_warnings=True) @@ -32,43 +36,69 @@ def fitness_func(ga, solution, idx): if keep_parents == 0: # 10 (for initial population) + 100*10 (for other generations) = 1010 expected_num_fitness_calls = sol_per_pop + num_generations * sol_per_pop + if mutation_type == "adaptive": + expected_num_fitness_calls += num_generations * sol_per_pop elif keep_parents == -1: # 10 (for initial population) + 100*num_parents_mating (for other generations) expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - num_parents_mating) + if mutation_type == "adaptive": + expected_num_fitness_calls += num_generations * (sol_per_pop - num_parents_mating) else: # 10 (for initial population) + 100*keep_parents (for other generations) expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - keep_parents) + if mutation_type == "adaptive": + expected_num_fitness_calls += num_generations * (sol_per_pop - keep_parents) else: # 10 (for initial population) + 100*keep_elitism (for other generations) expected_num_fitness_calls = sol_per_pop + num_generations * (sol_per_pop - keep_elitism) + if mutation_type == "adaptive": + expected_num_fitness_calls += num_generations * (sol_per_pop - keep_elitism) print("Expected number of fitness function calls is {expected_num_fitness_calls}.".format(expected_num_fitness_calls=expected_num_fitness_calls)) print("Actual number of fitness function calls is {actual_num_fitness_calls}.".format(actual_num_fitness_calls=actual_num_fitness_calls)) return actual_num_fitness_calls, expected_num_fitness_calls def test_number_calls_fitness_function_default_keep(): - actual, expected = number_calls_fitness_function(keep_elitism=1, - keep_parents=-1) + actual, expected = number_calls_fitness_function() assert actual == expected def test_number_calls_fitness_function_no_keep(): actual, expected = number_calls_fitness_function(keep_elitism=0, - keep_parents=0) + keep_parents=0) assert actual == expected def test_number_calls_fitness_function_keep_elitism(): actual, expected = number_calls_fitness_function(keep_elitism=3, - keep_parents=0) + keep_parents=0) assert actual == expected def test_number_calls_fitness_function_keep_parents(): actual, expected = number_calls_fitness_function(keep_elitism=0, - keep_parents=4) + keep_parents=4) assert actual == expected def test_number_calls_fitness_function_both_keep(): actual, expected = number_calls_fitness_function(keep_elitism=3, - keep_parents=4) + keep_parents=4) + assert actual == expected + +def test_number_calls_fitness_function_no_keep_adaptive_mutation(): + actual, expected = number_calls_fitness_function(keep_elitism=0, + keep_parents=0, + mutation_type="adaptive", + mutation_percent_genes=[10, 5]) + assert actual == expected + +def test_number_calls_fitness_function_default_adaptive_mutation(): + actual, expected = number_calls_fitness_function(mutation_type="adaptive", + mutation_percent_genes=[10, 5]) + assert actual == expected + +def test_number_calls_fitness_function_both_keep_adaptive_mutation(): + actual, expected = number_calls_fitness_function(keep_elitism=3, + keep_parents=4, + mutation_type="adaptive", + mutation_percent_genes=[10, 5]) assert actual == expected if __name__ == "__main__": @@ -83,3 +113,9 @@ def test_number_calls_fitness_function_both_keep(): print() test_number_calls_fitness_function_both_keep() print() + test_number_calls_fitness_function_no_keep_adaptive_mutation() + print() + test_number_calls_fitness_function_default_adaptive_mutation() + print() + test_number_calls_fitness_function_both_keep_adaptive_mutation() + print() From d7747697badcb8dc3dd578de241a42247683caaa Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Thu, 27 Apr 2023 19:58:11 -0400 Subject: [PATCH 25/32] Change test file name --- tests/{test_lifecycle.py => test_lifecycle_callbacks_calls.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_lifecycle.py => test_lifecycle_callbacks_calls.py} (100%) diff --git a/tests/test_lifecycle.py b/tests/test_lifecycle_callbacks_calls.py similarity index 100% rename from tests/test_lifecycle.py rename to tests/test_lifecycle_callbacks_calls.py From 66ec89550edafb1f8f76aa46e09c694814db6df2 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Mon, 1 May 2023 19:11:03 -0400 Subject: [PATCH 26/32] Solve duplicates deeply --- examples/example_fitness_wrapper.py | 41 +++ pygad/helper/unique.py | 547 +++++++++++++++++----------- pygad/pygad.py | 67 ++-- pygad/utils/mutation.py | 1 + tests/test_allow_duplicate_genes.py | 150 ++++++++ 5 files changed, 574 insertions(+), 232 deletions(-) create mode 100644 examples/example_fitness_wrapper.py create mode 100644 tests/test_allow_duplicate_genes.py diff --git a/examples/example_fitness_wrapper.py b/examples/example_fitness_wrapper.py new file mode 100644 index 0000000..6a9acf1 --- /dev/null +++ b/examples/example_fitness_wrapper.py @@ -0,0 +1,41 @@ +import pygad +import numpy + +""" +All the callback functions/methods in PyGAD have limits in the number of arguments passed. +For example, the fitness function accepts only 3 arguments: + 1. The pygad.GA instance. + 2. The solution(s). + 3. The index (indices) of the passed solution(s). +If it is necessary to pass extra arguments to the fitness function, for example, then follow these steps: + 1. Create a wrapper function that accepts only the number of arguments meeded by PyGAD. + 2. Define the extra arguments in the body of the wrapper function. + 3. Create an inner fitness function inside the wrapper function with whatever extra arguments needed. + 4. Call the inner fitness function from the wrapper function while passing the extra arguments. + +This is an example that passes a list ([10, 20, 30]) to the inner fitness function. The list has 3 numbers. +A number is randomly selected from the list and added to the calculated fitness. +""" + +function_inputs = [4,-2,3.5,5,-11,-4.7] +desired_output = 44 + +def fitness_func_wrapper(ga_instanse, solution, solution_idx): + def fitness_func(ga_instanse, solution, solution_idx, *args): + output = numpy.sum(solution*function_inputs) + output += numpy.random.choice(args) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + args = [10, 20, 30] + fitness = fitness_func(ga_instanse, solution, solution_idx, *args) + return fitness + +ga_instance = pygad.GA(num_generations=3, + num_parents_mating=5, + fitness_func=fitness_func_wrapper, + sol_per_pop=10, + num_genes=len(function_inputs), + suppress_warnings=True) + +ga_instance.run() +ga_instance.plot_fitness() diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py index 4d5e6ac..bd2da60 100644 --- a/pygad/helper/unique.py +++ b/pygad/helper/unique.py @@ -1,13 +1,16 @@ """ The pygad.helper.unique module has helper methods to solve duplicate genes and make sure every gene is unique. """ - + import numpy import warnings import random import pygad class Unique: + # DEEP-DUPLICATE-REMOVAL-NEEDED + NUM_DUPLICATE1 = 0 + NUM_DUPLICATE2 = 0 def solve_duplicate_genes_randomly(self, solution, min_val, @@ -73,7 +76,9 @@ def solve_duplicate_genes_randomly(self, pass else: temp_val = new_solution[duplicate_index] + temp_val - + + # Similar to the round_genes() method in the pygad module, + # Create a round_gene() method to round a single gene. if self.gene_type_single == True: if not gene_type[1] is None: temp_val = numpy.round(gene_type[0](temp_val), @@ -121,13 +126,12 @@ def solve_duplicate_genes_by_space(self, not_unique_indices: Indices of the genes with duplicate values. num_unsolved_duplicates: Number of unsolved duplicates. """ - new_solution = solution.copy() - + _, unique_gene_indices = numpy.unique(solution, return_index=True) not_unique_indices = set(range(len(solution))) - set(unique_gene_indices) # self.logger.info("not_unique_indices OUTSIDE", not_unique_indices) - + # First try to solve the duplicates. # For a solution like [3 2 0 0], the indices of the 2 duplicating genes are 2 and 3. # The next call to the find_unique_value() method tries to change the value of the gene with index 3 to solve the duplicate. @@ -150,13 +154,18 @@ def solve_duplicate_genes_by_space(self, num_trials=10, build_initial_pop=build_initial_pop) else: + # DEEP-DUPLICATE-REMOVAL-NEEDED # If there exist duplicate genes, then changing either of the 2 duplicating genes (with indices 2 and 3) will not solve the problem. # This problem can be solved by randomly changing one of the non-duplicating genes that may make a room for a unique value in one the 2 duplicating genes. # For example, if gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]] and the solution is [3 2 0 0], then the values of the last 2 genes duplicate. # There are no possible changes in the last 2 genes to solve the problem. But it could be solved by changing the second gene from 2 to 4. # As a result, any of the last 2 genes can take the value 2 and solve the duplicates. + # print("DEEP-DUPLICATE-REMOVAL-NEEDED1") + # print("DEEP-DUPLICATE-REMOVAL-NEEDED1\n", new_solution, not_unique_indices, len(not_unique_indices)) + # DEEP-DUPLICATE-REMOVAL-NEEDED + Unique.NUM_DUPLICATE1 += 1 return new_solution, not_unique_indices, len(not_unique_indices) - + return new_solution, not_unique_indices, num_unsolved_duplicates def unique_int_gene_from_range(self, @@ -167,7 +176,7 @@ def unique_int_gene_from_range(self, mutation_by_replacement, gene_type, step=None): - + """ Finds a unique integer value for the gene. @@ -188,7 +197,8 @@ def unique_int_gene_from_range(self, else: # For non-integer steps, the numpy.arange() function returns zeros id the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0]) # To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function. - all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), dtype=gene_type[0]) + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), + dtype=gene_type[0]) else: if step is None: all_gene_values = numpy.arange(min_val, max_val, dtype=gene_type[gene_index][0]) @@ -199,7 +209,9 @@ def unique_int_gene_from_range(self, pass else: all_gene_values = all_gene_values + solution[gene_index] - + + # Similar to the round_genes() method in the pygad module, + # Create a round_gene() method to round a single gene. if self.gene_type_single == True: if not gene_type[1] is None: all_gene_values = numpy.round(gene_type[0](all_gene_values), @@ -230,7 +242,7 @@ def unique_int_gene_from_range(self, # selected_value = gene_type[gene_index][0](selected_value) return selected_value - + def unique_genes_by_space(self, new_solution, gene_type, @@ -252,7 +264,7 @@ def unique_genes_by_space(self, not_unique_indices: Indices of the genes with duplicate values. num_unsolved_duplicates: Number of unsolved duplicates. """ - + num_unsolved_duplicates = 0 for duplicate_index in not_unique_indices: for trial_index in range(num_trials): @@ -278,7 +290,7 @@ def unique_genes_by_space(self, # self.logger.info("not_unique_indices INSIDE", not_unique_indices) return new_solution, not_unique_indices, num_unsolved_duplicates - + def unique_gene_by_space(self, solution, gene_idx, @@ -295,14 +307,15 @@ def unique_gene_by_space(self, Returns: A unique value, if exists, for the gene. """ - + if self.gene_space_nested: - # Returning the current gene space from the 'gene_space' attribute. if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + # Return the current gene space from the 'gene_space' attribute. curr_gene_space = self.gene_space[gene_idx].copy() else: - curr_gene_space = self.gene_space[gene_idx] - + # Return the entire gene space from the 'gene_space' attribute. + curr_gene_space = list(self.gene_space[gene_idx]).copy() + # If the gene space has only a single value, use it as the new gene value. if type(curr_gene_space) in pygad.GA.supported_int_float_types: value_from_space = curr_gene_space @@ -311,65 +324,61 @@ def unique_gene_by_space(self, if self.gene_type_single == True: if gene_type[0] in pygad.GA.supported_int_types: if build_initial_pop == True: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - # min_val=self.random_mutation_min_val, - # max_val=self.random_mutation_max_val, - min_val=self.init_range_low, - max_val=self.init_range_high, - mutation_by_replacement=True, - gene_type=gene_type) + # If we are building the initial population, then use the range of the initial population. + min_val = self.init_range_low + max_val = self.init_range_high else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.random_mutation_min_val, - max_val=self.random_mutation_max_val, - mutation_by_replacement=True, - gene_type=gene_type) + # If we are NOT building the initial population, then use the range of the random mutation. + min_val = self.random_mutation_min_val + max_val = self.random_mutation_max_val + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=min_val, + max_val=max_val, + mutation_by_replacement=True, + gene_type=gene_type) else: if build_initial_pop == True: - value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, - # high=self.random_mutation_max_val, - low=self.init_range_low, - high=self.init_range_high, - size=1) + low = self.init_range_low + high = self.init_range_high else: - value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, - high=self.random_mutation_max_val, - size=1) + low = self.random_mutation_min_val + high = self.random_mutation_max_val + + value_from_space = numpy.random.uniform(low=low, + high=high, + size=1) if self.mutation_by_replacement: pass else: value_from_space = solution[gene_idx] + value_from_space else: - if gene_type[gene_idx] in pygad.GA.supported_int_types: + if gene_type[gene_idx][0] in pygad.GA.supported_int_types: if build_initial_pop == True: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - # min_val=self.random_mutation_min_val, - # max_val=self.random_mutation_max_val, - min_val=self.init_range_low, - max_val=self.init_range_high, - mutation_by_replacement=True, - gene_type=gene_type) + min_val = self.init_range_low + max_val = self.init_range_high else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.random_mutation_min_val, - max_val=self.random_mutation_max_val, - mutation_by_replacement=True, - gene_type=gene_type) + min_val = self.random_mutation_min_val + max_val = self.random_mutation_max_val + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=min_val, + max_val=max_val, + mutation_by_replacement=True, + gene_type=gene_type) else: if build_initial_pop == True: - value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, - # high=self.random_mutation_max_val, - low=self.init_range_low, - high=self.init_range_high, - size=1) + low = self.init_range_low + high = self.init_range_high else: - value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, - high=self.random_mutation_max_val, - size=1) + low = self.random_mutation_min_val + high = self.random_mutation_max_val + + value_from_space = numpy.random.uniform(low=low, + high=high, + size=1) if self.mutation_by_replacement: pass else: @@ -378,40 +387,18 @@ def unique_gene_by_space(self, elif type(curr_gene_space) is dict: if self.gene_type_single == True: if gene_type[0] in pygad.GA.supported_int_types: - if build_initial_pop == True: - if 'step' in curr_gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=curr_gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + if 'step' in curr_gene_space.keys(): + step = curr_gene_space['step'] else: - if 'step' in curr_gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=curr_gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + step = None + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=step, + mutation_by_replacement=True, + gene_type=gene_type) else: if 'step' in curr_gene_space.keys(): value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], @@ -427,41 +414,20 @@ def unique_gene_by_space(self, else: value_from_space = solution[gene_idx] + value_from_space else: - if gene_type[gene_idx] in pygad.GA.supported_int_types: - if build_initial_pop == True: - if 'step' in curr_gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=curr_gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + # Use index 0 to return the type from the list (e.g. [int, None] or [float, 2]). + if gene_type[gene_idx][0] in pygad.GA.supported_int_types: + if 'step' in curr_gene_space.keys(): + step = curr_gene_space['step'] else: - if 'step' in curr_gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=curr_gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=curr_gene_space['low'], - max_val=curr_gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + step = None + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=curr_gene_space['low'], + max_val=curr_gene_space['high'], + step=step, + mutation_by_replacement=True, + gene_type=gene_type) else: if 'step' in curr_gene_space.keys(): value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], @@ -476,7 +442,7 @@ def unique_gene_by_space(self, pass else: value_from_space = solution[gene_idx] + value_from_space - + else: # Selecting a value randomly based on the current gene's space in the 'gene_space' attribute. # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. @@ -488,8 +454,20 @@ def unique_gene_by_space(self, values_to_select_from = list(set(curr_gene_space) - set(solution)) if len(values_to_select_from) == 0: + # DEEP-DUPLICATE-REMOVAL-NEEDED + # Reaching this block means there is no value in the gene space of this gene to solve the duplicates. + # To solve the duplicate between the 2 genes, the solution is to change the value of a third gene that makes a room to solve the duplicate. + if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but the gene space does not have enough values to prevent duplicates.") + solution2 = self.solve_duplicates_deeply(solution) + if solution2 is None: + # print("DEEP-DUPLICATE-REMOVAL-NEEDED2") + # print("DEEP-DUPLICATE-REMOVAL-NEEDED2", solution, gene_idx, solution[gene_idx]) + Unique.NUM_DUPLICATE2 += 1 + else: + solution = solution2 value_from_space = solution[gene_idx] + else: value_from_space = random.choice(values_to_select_from) else: @@ -497,40 +475,18 @@ def unique_gene_by_space(self, if type(self.gene_space) is dict: if self.gene_type_single == True: if gene_type[0] in pygad.GA.supported_int_types: - if build_initial_pop == True: - if 'step' in self.gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=self.gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + if 'step' in self.gene_space.keys(): + step = self.gene_space['step'] else: - if 'step' in self.gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=self.gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + step = None + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=step, + mutation_by_replacement=True, + gene_type=gene_type) else: # When the gene_space is assigned a dict object, then it specifies the lower and upper limits of all genes in the space. if 'step' in self.gene_space.keys(): @@ -547,41 +503,19 @@ def unique_gene_by_space(self, else: value_from_space = solution[gene_idx] + value_from_space else: - if gene_type[gene_idx] in pygad.GA.supported_int_types: - if build_initial_pop == True: - if 'step' in self.gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=self.gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + if gene_type[gene_idx][0] in pygad.GA.supported_int_types: + if 'step' in self.gene_space.keys(): + step = self.gene_space['step'] else: - if 'step' in self.gene_space.keys(): - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=self.gene_space['step'], - mutation_by_replacement=True, - gene_type=gene_type) - else: - value_from_space = self.unique_int_gene_from_range(solution=solution, - gene_index=gene_idx, - min_val=self.gene_space['low'], - max_val=self.gene_space['high'], - step=None, - mutation_by_replacement=True, - gene_type=gene_type) + step = None + + value_from_space = self.unique_int_gene_from_range(solution=solution, + gene_index=gene_idx, + min_val=self.gene_space['low'], + max_val=self.gene_space['high'], + step=step, + mutation_by_replacement=True, + gene_type=gene_type) else: # When the gene_space is assigned a dict object, then it specifies the lower and upper limits of all genes in the space. if 'step' in self.gene_space.keys(): @@ -609,19 +543,21 @@ def unique_gene_by_space(self, value_from_space = solution[gene_idx] else: value_from_space = random.choice(values_to_select_from) - + if value_from_space is None: if build_initial_pop == True: - value_from_space = numpy.random.uniform(# low=self.random_mutation_min_val, - # high=self.random_mutation_max_val, - low=self.init_range_low, - high=self.init_range_high, - size=1) + low = self.init_range_low + high = self.init_range_high else: - value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, - high=self.random_mutation_max_val, - size=1) - + low = self.random_mutation_min_val + high = self.random_mutation_max_val + + value_from_space = numpy.random.uniform(low=low, + high=high, + size=1) + + # Similar to the round_genes() method in the pygad module, + # Create a round_gene() method to round a single gene. if self.gene_type_single == True: if not gene_type[1] is None: value_from_space = numpy.round(gene_type[0](value_from_space), @@ -634,5 +570,202 @@ def unique_gene_by_space(self, gene_type[gene_idx][1]) else: value_from_space = gene_type[gene_idx][0](value_from_space) - + return value_from_space + + def find_two_duplicates(self, + solution, + gene_space_unpacked): + """ + Returns the first occurrence of duplicate genes. + It returns: + The index of a gene with a duplicate value. + The value of the gene. + """ + for gene in set(solution): + gene_indices = numpy.where(numpy.array(solution) == gene)[0] + if len(gene_indices) == 1: + continue + # print("Gene value", gene, "Gene indices", gene_indices) + for gene_idx in gene_indices: + # print(" Current Gene Index", gene_idx) + number_alternate_values = len(set(gene_space_unpacked[gene_idx])) + if number_alternate_values > 1: + return gene_idx, gene + # This means there is no way to solve the duplicates between the genes. + # Because the space of the duplicates genes only has a single value and there is no alternatives. + # print("Cannot solve duplicates between the genes with value {gene} at indices {gene_indices}.".format(gene_indices=gene_indices, gene=gene)) + return None, gene + + def unpack_gene_space(self, num_values_from_range=100): + """ + Unpack the gene_space for the purpose of selecting a value that solves the duplicates. + This is by replacing each range by a list of values. + For the infinite range of float values, a number of values equal to num_values_from_range is selected using the numpy.linspace() function. + It returns the unpacked gene space. + """ + + # Copy the gene_space to keep it isolated form the changes. + gene_space_unpacked = self.gene_space.copy() + for space_idx, space in enumerate(gene_space_unpacked): + if type(space) in pygad.GA.supported_int_float_types: + gene_space_unpacked[space_idx] = [space] + elif type(space) is None: + # Randomly generate the value using the mutation range. + gene_space_unpacked[space_idx] = numpy.arange(start=self.random_mutation_min_val, + stop=self.random_mutation_max_val) + elif type(space) is range: + # Convert the range to a list. + gene_space_unpacked[space_idx] = list(range) + elif type(space) is dict: + # Create a list of values using the dict range. + # Use numpy.linspace() + if self.gene_type_single == True: # self.gene_type_single + if self.gene_type[0] in pygad.GA.supported_int_types: + if 'step' in space.keys(): + step = space['step'] + else: + step = 1 + + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=step) + else: + if 'step' in space.keys(): + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=space['step']) + else: + gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], + stop=space['high'], + num=num_values_from_range, + endpoint=False) + else: + if self.gene_type[space_idx][0] in pygad.GA.supported_int_types: + if 'step' in space.keys(): + step = space['step'] + else: + step = 1 + + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=step) + else: + if 'step' in space.keys(): + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=space['step']) + else: + gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], + stop=space['high'], + num=num_values_from_range, + endpoint=False) + + elif type(space) in [list, tuple, numpy.ndarray]: + # list/tuple/numpy.ndarray + # Convert all to list + gene_space_unpacked[space_idx] = list(space) + + # Check if there is an item with the value None. If so, replace it with a random value using the mutation range. + none_indices = numpy.where(numpy.array(gene_space_unpacked[space_idx]) == None)[0] + if len(none_indices) > 0: + for idx in none_indices: + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + gene_space_unpacked[space_idx][idx] = random_value + + if self.gene_type_single == True: # self.gene_type_single + # Change the data type. + gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], + dtype=self.gene_type[0]) + if not self.gene_type[1] is None: + # Round the values for float (non-int) data types. + gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], + self.gene_type[1]) + else: + # Change the data type. + gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], + self.gene_type[space_idx][0]) + if not self.gene_type[space_idx][1] is None: + # Round the values for float (non-int) data types. + gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], + self.gene_type[space_idx][1]) + + return gene_space_unpacked + + def solve_duplicates_deeply(self, + solution): + """ + Sometimes it is impossible to solve the duplicate genes by simply selecting another value for either genes. + This function solve the duplicates between 2 genes by searching for a third gene that can make assist in the solution. + It returns: + The solution after solving the duplicates or the None if duplicates cannot be solved. + """ + + gene_space_unpacked = self.unpack_gene_space() + self.gene_space_unpacked = gene_space_unpacked.copy() + + duplicate_index, duplicate_value = self.find_two_duplicates(solution, + gene_space_unpacked) + # print() + # print("Duplicate_index, Duplicate_value", duplicate_index, duplicate_value) + + if duplicate_index is None: + # Impossible to solve the duplicates for the genes with value duplicate_value. + return None + + # gene_duplicate_value = solution[duplicate_index] + + + # Without copy(), the gene will be removed from the gene_space. + # Convert the space to list because tuples do not have copy() + gene_other_values = list(gene_space_unpacked[duplicate_index]).copy() + # This removes all the occurrences of this value using the __ne__ magic function. + # gene_other_values = list(filter((duplicate_value).__ne__, gene_other_values)) + gene_other_values = [v for v in gene_other_values if v != duplicate_value] + # The remove() function only removes the first occurrence of the value. + # gene_other_values.remove(duplicate_value) + # if len(gene_other_values) == 0: return None + + # print("Gene_other_values", gene_other_values) + # Two conditions to solve the duplicates of the value D: + # 1. From gene_other_values, select a value V such that it is available in the gene space of another gene X. + # 2. Find an alternate value for the gene X that will not cause any duplicates. + # 2.1 If the gene X does not have alternatives, then go back to step 1 to find another gene. + # 2.2 Set the gene X to the value D. + # 2.3 Set the target gene to the value V. + # search_gene_space = gene_space_unpacked.copy() + # Set the space of the duplicate gene to empty list []. Do not remove it to not alter the indices of the gene spaces. + # search_gene_space[duplicate_index] = [] + gene_space_unpacked[duplicate_index] = [] + # print("search_gene_space", search_gene_space) + + for other_value in gene_other_values: + for space_idx, space in enumerate(gene_space_unpacked): + # print("other_value in space", other_value, space) + if other_value in space: + if other_value in solution and list(solution).index(other_value) != space_idx: + continue + else: + # print(" Current Space", space, space_idx) + # Find an alternate value for the third gene. + # Copy the space so that the original space is not changed after removing the value. + space_other_values = space.copy() + # This removes all the occurrences of this value. It is not enough to use the remove() function because it only removes the first occurrence. + space_other_values = [v for v in space_other_values if v != other_value] + # print("Space_other_values", space_other_values, other_value) + for val in space_other_values: + # print("val", val) + if val in solution: + # If the value exists in another gene of the solution, then we cannot use this value as it will cause another duplicate. + # End the current iteration and go check another value. + continue + else: + solution[space_idx] = val + solution[duplicate_index] = other_value + # print("solution", solution) + return solution + # print("Cannot solve the duplicate genes with value {duplicate_value}.".format(duplicate_value=duplicate_value)) + return None + diff --git a/pygad/pygad.py b/pygad/pygad.py index b63e613..ef12c94 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -191,7 +191,12 @@ def __init__(self, self.gene_space_nested = False if type(gene_space) is type(None): pass - elif type(gene_space) in [list, tuple, range, numpy.ndarray]: + elif type(gene_space) is range: + if len(gene_space) == 0: + self.valid_parameters = False + self.logger.error("'gene_space' cannot be empty (i.e. its length must be >= 0).") + raise ValueError("'gene_space' cannot be empty (i.e. its length must be >= 0).") + elif type(gene_space) in [list, numpy.ndarray]: if len(gene_space) == 0: self.valid_parameters = False self.logger.error("'gene_space' cannot be empty (i.e. its length must be >= 0).") @@ -259,8 +264,8 @@ def __init__(self, else: self.valid_parameters = False - self.logger.error("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) - raise TypeError("The expected type of 'gene_space' is list, tuple, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) + self.logger.error("The expected type of 'gene_space' is list, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) + raise TypeError("The expected type of 'gene_space' is list, range, or numpy.ndarray but {gene_space_type} found.".format(gene_space_type=type(gene_space))) self.gene_space = gene_space @@ -302,6 +307,11 @@ def __init__(self, elif len(gene_type) == 2 and gene_type[0] in GA.supported_float_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): self.gene_type = gene_type self.gene_type_single = True + # A single data type of int with precision. + elif len(gene_type) == 2 and gene_type[0] in GA.supported_int_types and (type(gene_type[1]) in GA.supported_int_types or gene_type[1] is None): + self.gene_type_single = False + self.logger.error("Integers cannot have precision. Please use the integer data type directly instead of {gene_type_val}.".format(gene_type_val=gene_type)) + raise ValueError("Integers cannot have precision. Please use the integer data type directly instead of {gene_type_val}.".format(gene_type_val=gene_type)) elif type(gene_type) in [list, tuple, numpy.ndarray]: if num_genes is None: if initial_population is None: @@ -334,8 +344,8 @@ def __init__(self, raise TypeError("In the 'gene_type' parameter, the precision for float gene data types must be an integer but the element {gene_type_val} at index {gene_type_idx} has a precision of {gene_type_precision_val} with type {gene_type_type}.".format(gene_type_val=gene_type_val, gene_type_precision_val=gene_type_val[1], gene_type_type=gene_type_val[0], gene_type_idx=gene_type_idx)) else: self.valid_parameters = False - self.logger.error("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) - raise TypeError("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) + self.logger.error("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 of the item followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) + raise TypeError("In the 'gene_type' parameter, a precision is expected only for float gene data types but the element {gene_type} found at index {gene_type_idx}.\nNote that the data type must be at index 0 of the item followed by precision at index 1.".format(gene_type=gene_type_val, gene_type_idx=gene_type_idx)) else: self.valid_parameters = False self.logger.error("In the 'gene_type' parameter, a precision is specified in a list/tuple/numpy.ndarray of length 2 but value ({gene_type_val}) of type {gene_type_type} with length {gene_type_length} found at index {gene_type_idx}.".format(gene_type_val=gene_type_val, gene_type_type=type(gene_type_val), gene_type_idx=gene_type_idx, gene_type_length=len(gene_type_val))) @@ -1315,21 +1325,42 @@ def initialize_population(self, self.population = numpy.zeros(shape=self.pop_size, dtype=self.gene_type[0]) for sol_idx in range(self.sol_per_pop): for gene_idx in range(self.num_genes): - if type(self.gene_space[gene_idx]) in [list, tuple, range]: + if type(self.gene_space[gene_idx]) == type(None): + + # The following commented code replace the None value with a single number that will not change again. + # This means the gene value will be the same across all solutions. + # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + # high=high, + # size=1), dtype=self.gene_type[0])[0] + # self.population[sol_idx, gene_idx] = list(self.gene_space[gene_idx]).copy() + + # The above problem is solved by keeping the None value in the gene_space parameter. This forces PyGAD to generate this value for each solution. + self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[0])[0] + elif type(self.gene_space[gene_idx]) in [list, tuple, range]: # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. if type(self.gene_space[gene_idx]) is range: temp = self.gene_space[gene_idx] else: - temp = self.gene_space[gene_idx].copy() + temp = list(self.gene_space[gene_idx]).copy() + for idx, val in enumerate(self.gene_space[gene_idx]): if val is None: self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, high=high, size=1), dtype=self.gene_type[0])[0] - self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) + # Find the difference between the current gene space and the current values in the solution. + unique_gene_values = list(set(self.gene_space[gene_idx]).difference(set(self.population[sol_idx, :gene_idx]))) + if len(unique_gene_values) > 0: + self.population[sol_idx, gene_idx] = random.choice(unique_gene_values) + else: + # If there is no unique values, then we have to select a duplicate value. + self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) self.population[sol_idx, gene_idx] = self.gene_type[0](self.population[sol_idx, gene_idx]) - self.gene_space[gene_idx] = temp + self.gene_space[gene_idx] = list(temp).copy() elif type(self.gene_space[gene_idx]) is dict: if 'step' in self.gene_space[gene_idx].keys(): self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], @@ -1342,20 +1373,6 @@ def initialize_population(self, high=self.gene_space[gene_idx]['high'], size=1), dtype=self.gene_type[0])[0] - elif type(self.gene_space[gene_idx]) == type(None): - - # The following commented code replace the None value with a single number that will not change again. - # This means the gene value will be the same across all solutions. - # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - # high=high, - # size=1), dtype=self.gene_type[0])[0] - # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() - - # The above problem is solved by keeping the None value in the gene_space parameter. This forces PyGAD to generate this value for each solution. - self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[0])[0] elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] else: @@ -1364,7 +1381,7 @@ def initialize_population(self, for gene_idx in range(self.num_genes): if type(self.gene_space[gene_idx]) in [list, tuple, range]: # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. - temp = self.gene_space[gene_idx].copy() + temp = list(self.gene_space[gene_idx]).copy() for idx, val in enumerate(self.gene_space[gene_idx]): if val is None: self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, @@ -1392,7 +1409,7 @@ def initialize_population(self, # size=1), # dtype=self.gene_type[gene_idx][0])[0] - # self.population[sol_idx, gene_idx] = self.gene_space[gene_idx].copy() + # self.population[sol_idx, gene_idx] = list(self.gene_space[gene_idx]).copy() temp = numpy.asarray(numpy.random.uniform(low=low, high=high, diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index a5eb867..9ce8722 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -8,6 +8,7 @@ import pygad class Mutation: + def random_mutation(self, offspring): """ diff --git a/tests/test_allow_duplicate_genes.py b/tests/test_allow_duplicate_genes.py new file mode 100644 index 0000000..cc3db96 --- /dev/null +++ b/tests/test_allow_duplicate_genes.py @@ -0,0 +1,150 @@ +import pygad +import random +import numpy + +num_generations = 100 + +def number_duplicate_genes(gene_space=None, + gene_type=float, + num_genes=10, + mutation_by_replacement=False, + random_mutation_min_val=-1, + random_mutation_max_val=1, + init_range_low=-4, + init_range_high=4): + + def fitness_func(ga, solution, idx): + return random.random() + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=fitness_func, + sol_per_pop=10, + num_genes=num_genes, + gene_space=gene_space, + gene_type=gene_type, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val, + allow_duplicate_genes=False, + mutation_by_replacement=mutation_by_replacement, + save_solutions=True, + suppress_warnings=True) + + ga_instance.run() + + num_duplicates = 0 + for solution in ga_instance.solutions: + num = len(solution) - len(set(solution)) + if num != 0: + print(solution) + num_duplicates += num + + print("Number of duplicates is {num_duplicates}.".format(num_duplicates=num_duplicates)) + return num_duplicates + +def test_number_duplicates_default(): + num_duplicates = number_duplicate_genes() + + assert num_duplicates == 0 + +def test_number_duplicates_float_gene_type(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_type=float, + num_genes=num_genes, + init_range_low=0, + init_range_high=1, + random_mutation_min_val=0, + random_mutation_max_val=1) + + assert num_duplicates == 0 + +def test_number_duplicates_int_gene_type(): + num_genes = 10 + init_range_low = 0 + init_range_high = init_range_low + num_genes + random_mutation_min_val = 0 + random_mutation_max_val = random_mutation_min_val + num_genes + num_duplicates = number_duplicate_genes(gene_type=int, + mutation_by_replacement=False, + num_genes=num_genes, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val) + + assert num_duplicates == 0 + +def test_number_duplicates_int_gene_type_replacement(): + num_genes = 10 + init_range_low = 0 + init_range_high = init_range_low + num_genes + random_mutation_min_val = 0 + random_mutation_max_val = random_mutation_min_val + num_genes + num_duplicates = number_duplicate_genes(gene_type=int, + mutation_by_replacement=True, + num_genes=num_genes, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val) + + assert num_duplicates == 0 + +def test_number_duplicates_single_gene_space(): + num_duplicates = number_duplicate_genes(gene_space=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + num_genes=10) + + assert num_duplicates == 0 + +def test_number_duplicates_single_range_gene_space(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_space=range(num_genes), + num_genes=num_genes) + + assert num_duplicates == 0 + +def test_number_duplicates_single_numpy_range_gene_space(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_space=numpy.arange(num_genes), + num_genes=num_genes) + + assert num_duplicates == 0 + +def test_number_duplicates_nested_gene_space(): + num_duplicates = number_duplicate_genes(gene_space=[[0, 1], + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10]], + gene_type=int, + random_mutation_min_val=11, + random_mutation_max_val=20, + mutation_by_replacement=True, + num_genes=10) + + assert num_duplicates == 0 + +if __name__ == "__main__": + # print() + # test_number_duplicates_default() + # print() + # test_number_duplicates_float_gene_type() + # print() + # test_number_duplicates_int_gene_type() + # print() + # test_number_duplicates_single_gene_space() + # print() + # test_number_duplicates_single_range_gene_space() + # print() + # test_number_duplicates_single_numpy_range_gene_space() + print() + test_number_duplicates_nested_gene_space() + print() + From 160a8365d0ba6dcda32774409fd55c4430ed0c53 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Mon, 1 May 2023 19:31:05 -0400 Subject: [PATCH 27/32] Refactor unique.py --- pygad/helper/unique.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py index bd2da60..b3a1e61 100644 --- a/pygad/helper/unique.py +++ b/pygad/helper/unique.py @@ -8,9 +8,7 @@ import pygad class Unique: - # DEEP-DUPLICATE-REMOVAL-NEEDED - NUM_DUPLICATE1 = 0 - NUM_DUPLICATE2 = 0 + def solve_duplicate_genes_randomly(self, solution, min_val, @@ -155,15 +153,13 @@ def solve_duplicate_genes_by_space(self, build_initial_pop=build_initial_pop) else: # DEEP-DUPLICATE-REMOVAL-NEEDED + # Search by this phrase to find where deep duplicates removal should be applied. + # If there exist duplicate genes, then changing either of the 2 duplicating genes (with indices 2 and 3) will not solve the problem. # This problem can be solved by randomly changing one of the non-duplicating genes that may make a room for a unique value in one the 2 duplicating genes. # For example, if gene_space=[[3, 0, 1], [4, 1, 2], [0, 2], [3, 2, 0]] and the solution is [3 2 0 0], then the values of the last 2 genes duplicate. # There are no possible changes in the last 2 genes to solve the problem. But it could be solved by changing the second gene from 2 to 4. # As a result, any of the last 2 genes can take the value 2 and solve the duplicates. - # print("DEEP-DUPLICATE-REMOVAL-NEEDED1") - # print("DEEP-DUPLICATE-REMOVAL-NEEDED1\n", new_solution, not_unique_indices, len(not_unique_indices)) - # DEEP-DUPLICATE-REMOVAL-NEEDED - Unique.NUM_DUPLICATE1 += 1 return new_solution, not_unique_indices, len(not_unique_indices) return new_solution, not_unique_indices, num_unsolved_duplicates @@ -288,7 +284,7 @@ def unique_genes_by_space(self, _, unique_gene_indices = numpy.unique(new_solution, return_index=True) not_unique_indices = set(range(len(new_solution))) - set(unique_gene_indices) # self.logger.info("not_unique_indices INSIDE", not_unique_indices) - + return new_solution, not_unique_indices, num_unsolved_duplicates def unique_gene_by_space(self, @@ -455,15 +451,18 @@ def unique_gene_by_space(self, if len(values_to_select_from) == 0: # DEEP-DUPLICATE-REMOVAL-NEEDED + # Search by this phrase to find where deep duplicates removal should be applied. + # Reaching this block means there is no value in the gene space of this gene to solve the duplicates. # To solve the duplicate between the 2 genes, the solution is to change the value of a third gene that makes a room to solve the duplicate. if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but the gene space does not have enough values to prevent duplicates.") + solution2 = self.solve_duplicates_deeply(solution) if solution2 is None: - # print("DEEP-DUPLICATE-REMOVAL-NEEDED2") - # print("DEEP-DUPLICATE-REMOVAL-NEEDED2", solution, gene_idx, solution[gene_idx]) - Unique.NUM_DUPLICATE2 += 1 + # Cannot solve duplicates. At the moment, we are changing the value of a third gene to solve the duplicates between 2 genes. + # Maybe a 4th, 5th, 6th, or even more genes need to be changed to solve the duplicates. + pass else: solution = solution2 value_from_space = solution[gene_idx] From 19be8b0f38b00a8d85191a9c248c92464dc1a1a4 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 2 May 2023 11:06:52 -0400 Subject: [PATCH 28/32] Refactoring the code --- pygad/helper/unique.py | 83 +++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py index b3a1e61..74ecbeb 100644 --- a/pygad/helper/unique.py +++ b/pygad/helper/unique.py @@ -59,7 +59,7 @@ def solve_duplicate_genes_randomly(self, else: temp_val = new_solution[duplicate_index] + temp_val else: - if gene_type[duplicate_index] in pygad.GA.supported_int_types: + if gene_type[duplicate_index][0] in pygad.GA.supported_int_types: temp_val = self.unique_int_gene_from_range(solution=new_solution, gene_index=duplicate_index, min_val=min_val, @@ -186,43 +186,43 @@ def unique_int_gene_from_range(self, Returns: selected_value: The new value of the gene. It may be identical to the original gene value in case there are no possible unique values for the gene. """ - + if self.gene_type_single == True: if step is None: - all_gene_values = numpy.arange(min_val, max_val, dtype=gene_type[0]) + # all_gene_values = numpy.arange(min_val, + # max_val, + # dtype=gene_type[0]) + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val), + dtype=gene_type[0]) else: - # For non-integer steps, the numpy.arange() function returns zeros id the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0]) + # For non-integer steps, the numpy.arange() function returns zeros if the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0]) # To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function. all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), dtype=gene_type[0]) else: if step is None: - all_gene_values = numpy.arange(min_val, max_val, dtype=gene_type[gene_index][0]) + # all_gene_values = numpy.arange(min_val, + # max_val, + # dtype=gene_type[gene_index][0]) + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val), + dtype=gene_type[gene_index][0]) else: - all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), dtype=gene_type[gene_index][0]) + all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), + dtype=gene_type[gene_index][0]) if mutation_by_replacement: pass else: all_gene_values = all_gene_values + solution[gene_index] - # Similar to the round_genes() method in the pygad module, - # Create a round_gene() method to round a single gene. if self.gene_type_single == True: - if not gene_type[1] is None: - all_gene_values = numpy.round(gene_type[0](all_gene_values), - gene_type[1]) - else: - if type(all_gene_values) is numpy.ndarray: - all_gene_values = numpy.asarray(all_gene_values, dtype=gene_type[0]) - else: - all_gene_values = gene_type[0](all_gene_values) + # Note that we already know that the data type is integer. + all_gene_values = numpy.asarray(all_gene_values, + dtype=gene_type[0]) else: - if not gene_type[gene_index][1] is None: - all_gene_values = numpy.round(gene_type[gene_index][0](all_gene_values), - gene_type[gene_index][1]) - else: - all_gene_values = gene_type[gene_index][0](all_gene_values) + # Note that we already know that the data type is integer. + all_gene_values = numpy.asarray(all_gene_values, + gene_type[gene_index][0]) values_to_select_from = list(set(all_gene_values) - set(solution)) @@ -232,11 +232,6 @@ def unique_int_gene_from_range(self, else: selected_value = random.choice(values_to_select_from) - #if self.gene_type_single == True: - # selected_value = gene_type[0](selected_value) - #else: - # selected_value = gene_type[gene_index][0](selected_value) - return selected_value def unique_genes_by_space(self, @@ -310,7 +305,8 @@ def unique_gene_by_space(self, curr_gene_space = self.gene_space[gene_idx].copy() else: # Return the entire gene space from the 'gene_space' attribute. - curr_gene_space = list(self.gene_space[gene_idx]).copy() + # curr_gene_space = list(self.gene_space[gene_idx]).copy() + curr_gene_space = self.gene_space[gene_idx] # If the gene space has only a single value, use it as the new gene value. if type(curr_gene_space) in pygad.GA.supported_int_float_types: @@ -585,15 +581,12 @@ def find_two_duplicates(self, gene_indices = numpy.where(numpy.array(solution) == gene)[0] if len(gene_indices) == 1: continue - # print("Gene value", gene, "Gene indices", gene_indices) for gene_idx in gene_indices: - # print(" Current Gene Index", gene_idx) number_alternate_values = len(set(gene_space_unpacked[gene_idx])) if number_alternate_values > 1: return gene_idx, gene # This means there is no way to solve the duplicates between the genes. # Because the space of the duplicates genes only has a single value and there is no alternatives. - # print("Cannot solve duplicates between the genes with value {gene} at indices {gene_indices}.".format(gene_indices=gene_indices, gene=gene)) return None, gene def unpack_gene_space(self, num_values_from_range=100): @@ -609,13 +602,13 @@ def unpack_gene_space(self, num_values_from_range=100): for space_idx, space in enumerate(gene_space_unpacked): if type(space) in pygad.GA.supported_int_float_types: gene_space_unpacked[space_idx] = [space] - elif type(space) is None: + elif space is None: # Randomly generate the value using the mutation range. gene_space_unpacked[space_idx] = numpy.arange(start=self.random_mutation_min_val, stop=self.random_mutation_max_val) elif type(space) is range: # Convert the range to a list. - gene_space_unpacked[space_idx] = list(range) + gene_space_unpacked[space_idx] = list(space) elif type(space) is dict: # Create a list of values using the dict range. # Use numpy.linspace() @@ -703,59 +696,50 @@ def solve_duplicates_deeply(self, """ gene_space_unpacked = self.unpack_gene_space() + # Create a copy into the attribute because it will be changed later. self.gene_space_unpacked = gene_space_unpacked.copy() duplicate_index, duplicate_value = self.find_two_duplicates(solution, gene_space_unpacked) - # print() - # print("Duplicate_index, Duplicate_value", duplicate_index, duplicate_value) if duplicate_index is None: # Impossible to solve the duplicates for the genes with value duplicate_value. return None - # gene_duplicate_value = solution[duplicate_index] - # Without copy(), the gene will be removed from the gene_space. # Convert the space to list because tuples do not have copy() gene_other_values = list(gene_space_unpacked[duplicate_index]).copy() - # This removes all the occurrences of this value using the __ne__ magic function. - # gene_other_values = list(filter((duplicate_value).__ne__, gene_other_values)) + + # This removes all the occurrences of this value. gene_other_values = [v for v in gene_other_values if v != duplicate_value] + # The remove() function only removes the first occurrence of the value. + # Do not use it. # gene_other_values.remove(duplicate_value) - # if len(gene_other_values) == 0: return None - # print("Gene_other_values", gene_other_values) # Two conditions to solve the duplicates of the value D: # 1. From gene_other_values, select a value V such that it is available in the gene space of another gene X. # 2. Find an alternate value for the gene X that will not cause any duplicates. # 2.1 If the gene X does not have alternatives, then go back to step 1 to find another gene. # 2.2 Set the gene X to the value D. # 2.3 Set the target gene to the value V. - # search_gene_space = gene_space_unpacked.copy() # Set the space of the duplicate gene to empty list []. Do not remove it to not alter the indices of the gene spaces. - # search_gene_space[duplicate_index] = [] gene_space_unpacked[duplicate_index] = [] - # print("search_gene_space", search_gene_space) for other_value in gene_other_values: for space_idx, space in enumerate(gene_space_unpacked): - # print("other_value in space", other_value, space) if other_value in space: if other_value in solution and list(solution).index(other_value) != space_idx: continue else: - # print(" Current Space", space, space_idx) # Find an alternate value for the third gene. # Copy the space so that the original space is not changed after removing the value. space_other_values = space.copy() # This removes all the occurrences of this value. It is not enough to use the remove() function because it only removes the first occurrence. space_other_values = [v for v in space_other_values if v != other_value] - # print("Space_other_values", space_other_values, other_value) + for val in space_other_values: - # print("val", val) if val in solution: # If the value exists in another gene of the solution, then we cannot use this value as it will cause another duplicate. # End the current iteration and go check another value. @@ -763,8 +747,7 @@ def solve_duplicates_deeply(self, else: solution[space_idx] = val solution[duplicate_index] = other_value - # print("solution", solution) return solution - # print("Cannot solve the duplicate genes with value {duplicate_value}.".format(duplicate_value=duplicate_value)) - return None + # Reaching here means we cannot solve the duplicate genes. + return None From 1c446922c1434fd3c8adc3107ed45d615fafe5f3 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 2 May 2023 18:54:21 -0400 Subject: [PATCH 29/32] More tests --- pygad/helper/unique.py | 314 ++++++++++++++----------- pygad/pygad.py | 147 ++++++++---- pygad/utils/crossover.py | 43 ++-- tests/test_allow_duplicate_genes.py | 227 ++++++++++++++++-- tests/test_gene_space.py | 345 ++++++++++++++++++++++++++++ 5 files changed, 862 insertions(+), 214 deletions(-) create mode 100644 tests/test_gene_space.py diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py index 74ecbeb..3224445 100644 --- a/pygad/helper/unique.py +++ b/pygad/helper/unique.py @@ -19,14 +19,14 @@ def solve_duplicate_genes_randomly(self, """ Solves the duplicates in a solution by randomly selecting new values for the duplicating genes. - + solution: A solution with duplicate values. min_val: Minimum value of the range to sample a number randomly. max_val: Maximum value of the range to sample a number randomly. mutation_by_replacement: Identical to the self.mutation_by_replacement attribute. gene_type: Exactly the same as the self.gene_type attribute. num_trials: Maximum number of trials to change the gene value to solve the duplicates. - + Returns: new_solution: Solution after trying to solve its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. not_unique_indices: Indices of the genes with duplicate values. @@ -197,17 +197,22 @@ def unique_int_gene_from_range(self, else: # For non-integer steps, the numpy.arange() function returns zeros if the dtype parameter is set to an integer data type. So, this returns zeros if step is non-integer and dtype is set to an int data type: numpy.arange(min_val, max_val, step, dtype=gene_type[0]) # To solve this issue, the data type casting will not be handled inside numpy.arange(). The range is generated by numpy.arange() and then the data type is converted using the numpy.asarray() function. - all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), + all_gene_values = numpy.asarray(numpy.arange(min_val, + max_val, + step), dtype=gene_type[0]) else: if step is None: # all_gene_values = numpy.arange(min_val, # max_val, # dtype=gene_type[gene_index][0]) - all_gene_values = numpy.asarray(numpy.arange(min_val, max_val), + all_gene_values = numpy.asarray(numpy.arange(min_val, + max_val), dtype=gene_type[gene_index][0]) else: - all_gene_values = numpy.asarray(numpy.arange(min_val, max_val, step), + all_gene_values = numpy.asarray(numpy.arange(min_val, + max_val, + step), dtype=gene_type[gene_index][0]) if mutation_by_replacement: @@ -215,6 +220,7 @@ def unique_int_gene_from_range(self, else: all_gene_values = all_gene_values + solution[gene_index] + # TODO: The gene data type is converted twine. One above and one here. if self.gene_type_single == True: # Note that we already know that the data type is integer. all_gene_values = numpy.asarray(all_gene_values, @@ -227,6 +233,7 @@ def unique_int_gene_from_range(self, values_to_select_from = list(set(all_gene_values) - set(solution)) if len(values_to_select_from) == 0: + # If there is no values, then keep the current gene value. if not self.suppress_warnings: warnings.warn("You set 'allow_duplicate_genes=False' but there is no enough values to prevent duplicates.") selected_value = solution[gene_index] else: @@ -240,47 +247,47 @@ def unique_genes_by_space(self, not_unique_indices, num_trials=10, build_initial_pop=False): - - """ - Loops through all the duplicating genes to find unique values that from their gene spaces to solve the duplicates. - For each duplicating gene, a call to the unique_gene_by_space() function is made. - - new_solution: A solution with duplicate values. - gene_type: Exactly the same as the self.gene_type attribute. - not_unique_indices: Indices with duplicating values. - num_trials: Maximum number of trials to change the gene value to solve the duplicates. - - Returns: - new_solution: Solution after trying to solve all of its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. - not_unique_indices: Indices of the genes with duplicate values. - num_unsolved_duplicates: Number of unsolved duplicates. - """ - num_unsolved_duplicates = 0 - for duplicate_index in not_unique_indices: - for trial_index in range(num_trials): - temp_val = self.unique_gene_by_space(solution=new_solution, - gene_idx=duplicate_index, - gene_type=gene_type, - build_initial_pop=build_initial_pop) - - if temp_val in new_solution and trial_index == (num_trials - 1): - # self.logger.info("temp_val, duplicate_index", temp_val, duplicate_index, new_solution) - num_unsolved_duplicates = num_unsolved_duplicates + 1 - if not self.suppress_warnings: warnings.warn("Failed to find a unique value for gene with index {gene_idx} whose value is {gene_value}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.".format(gene_idx=duplicate_index, gene_value=new_solution[duplicate_index])) - elif temp_val in new_solution: - continue - else: - new_solution[duplicate_index] = temp_val - # self.logger.info("SOLVED", duplicate_index) - break + """ + Loops through all the duplicating genes to find unique values that from their gene spaces to solve the duplicates. + For each duplicating gene, a call to the unique_gene_by_space() function is made. + + new_solution: A solution with duplicate values. + gene_type: Exactly the same as the self.gene_type attribute. + not_unique_indices: Indices with duplicating values. + num_trials: Maximum number of trials to change the gene value to solve the duplicates. + + Returns: + new_solution: Solution after trying to solve all of its duplicates. If no duplicates solved, then it is identical to the passed solution parameter. + not_unique_indices: Indices of the genes with duplicate values. + num_unsolved_duplicates: Number of unsolved duplicates. + """ + + num_unsolved_duplicates = 0 + for duplicate_index in not_unique_indices: + for trial_index in range(num_trials): + temp_val = self.unique_gene_by_space(solution=new_solution, + gene_idx=duplicate_index, + gene_type=gene_type, + build_initial_pop=build_initial_pop) + + if temp_val in new_solution and trial_index == (num_trials - 1): + # self.logger.info("temp_val, duplicate_index", temp_val, duplicate_index, new_solution) + num_unsolved_duplicates = num_unsolved_duplicates + 1 + if not self.suppress_warnings: warnings.warn("Failed to find a unique value for gene with index {gene_idx} whose value is {gene_value}. Consider adding more values in the gene space or use a wider range for initial population or random mutation.".format(gene_idx=duplicate_index, gene_value=new_solution[duplicate_index])) + elif temp_val in new_solution: + continue + else: + new_solution[duplicate_index] = temp_val + # self.logger.info("SOLVED", duplicate_index) + break - # Update the list of duplicate indices after each iteration. - _, unique_gene_indices = numpy.unique(new_solution, return_index=True) - not_unique_indices = set(range(len(new_solution))) - set(unique_gene_indices) - # self.logger.info("not_unique_indices INSIDE", not_unique_indices) + # Update the list of duplicate indices after each iteration. + _, unique_gene_indices = numpy.unique(new_solution, return_index=True) + not_unique_indices = set(range(len(new_solution))) - set(unique_gene_indices) + # self.logger.info("not_unique_indices INSIDE", not_unique_indices) - return new_solution, not_unique_indices, num_unsolved_duplicates + return new_solution, not_unique_indices, num_unsolved_duplicates def unique_gene_by_space(self, solution, @@ -300,9 +307,9 @@ def unique_gene_by_space(self, """ if self.gene_space_nested: - if type(self.gene_space[gene_idx]) in [numpy.ndarray, list]: + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list, tuple]: # Return the current gene space from the 'gene_space' attribute. - curr_gene_space = self.gene_space[gene_idx].copy() + curr_gene_space = list(self.gene_space[gene_idx]).copy() else: # Return the entire gene space from the 'gene_space' attribute. # curr_gene_space = list(self.gene_space[gene_idx]).copy() @@ -341,6 +348,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=low, high=high, size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -371,6 +379,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=low, high=high, size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -401,6 +410,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -430,6 +440,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -493,6 +504,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -522,6 +534,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], size=1) + # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. if self.mutation_by_replacement: pass else: @@ -589,100 +602,145 @@ def find_two_duplicates(self, # Because the space of the duplicates genes only has a single value and there is no alternatives. return None, gene - def unpack_gene_space(self, num_values_from_range=100): + def unpack_gene_space(self, + num_values_from_inf_range=100): """ Unpack the gene_space for the purpose of selecting a value that solves the duplicates. This is by replacing each range by a list of values. - For the infinite range of float values, a number of values equal to num_values_from_range is selected using the numpy.linspace() function. + It accepts: + num_values_from_inf_range: For infinite range of float values, a fixed number of values equal to num_values_from_inf_range is selected using the numpy.linspace() function. It returns the unpacked gene space. """ # Copy the gene_space to keep it isolated form the changes. - gene_space_unpacked = self.gene_space.copy() - for space_idx, space in enumerate(gene_space_unpacked): - if type(space) in pygad.GA.supported_int_float_types: - gene_space_unpacked[space_idx] = [space] - elif space is None: - # Randomly generate the value using the mutation range. - gene_space_unpacked[space_idx] = numpy.arange(start=self.random_mutation_min_val, - stop=self.random_mutation_max_val) - elif type(space) is range: - # Convert the range to a list. - gene_space_unpacked[space_idx] = list(space) - elif type(space) is dict: - # Create a list of values using the dict range. - # Use numpy.linspace() - if self.gene_type_single == True: # self.gene_type_single - if self.gene_type[0] in pygad.GA.supported_int_types: - if 'step' in space.keys(): - step = space['step'] - else: - step = 1 + if self.gene_space is None: + return None - gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], - stop=space['high'], - step=step) - else: - if 'step' in space.keys(): + if self.gene_space_nested == False: + if type(self.gene_space) is range: + gene_space_unpacked = list(self.gene_space) + elif type(self.gene_space) in [numpy.ndarray, list]: + gene_space_unpacked = self.gene_space.copy() + elif type(self.gene_space) is dict: + if 'step' in self.gene_space.keys(): + gene_space_unpacked = numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']) + else: + gene_space_unpacked = numpy.linspace(start=self.gene_space['low'], + stop=self.gene_space['high'], + num=num_values_from_inf_range, + endpoint=False) + + if self.gene_type_single == True: + # Change the data type. + gene_space_unpacked = numpy.array(gene_space_unpacked, + dtype=self.gene_type[0]) + if not self.gene_type[1] is None: + # Round the values for float (non-int) data types. + gene_space_unpacked = numpy.round(gene_space_unpacked, + self.gene_type[1]) + else: + temp_gene_space_unpacked = gene_space_unpacked.copy() + gene_space_unpacked = [] + # Get the number of genes from the length of gene_type. + # The num_genes attribute is not set yet when this method (unpack_gene_space) is called for the first time. + for gene_idx in range(len(self.gene_type)): + # Change the data type. + gene_space_item_unpacked = numpy.array(temp_gene_space_unpacked, + self.gene_type[gene_idx][0]) + if not self.gene_type[gene_idx][1] is None: + # Round the values for float (non-int) data types. + gene_space_item_unpacked = numpy.round(temp_gene_space_unpacked, + self.gene_type[gene_idx][1]) + gene_space_unpacked.append(gene_space_item_unpacked) + + elif self.gene_space_nested == True: + gene_space_unpacked = self.gene_space.copy() + for space_idx, space in enumerate(gene_space_unpacked): + if type(space) in pygad.GA.supported_int_float_types: + gene_space_unpacked[space_idx] = [space] + elif space is None: + # Randomly generate the value using the mutation range. + gene_space_unpacked[space_idx] = numpy.arange(start=self.random_mutation_min_val, + stop=self.random_mutation_max_val) + elif type(space) is range: + # Convert the range to a list. + gene_space_unpacked[space_idx] = list(space) + elif type(space) is dict: + # Create a list of values using the dict range. + # Use numpy.linspace() + if self.gene_type_single == True: # self.gene_type_single + if self.gene_type[0] in pygad.GA.supported_int_types: + if 'step' in space.keys(): + step = space['step'] + else: + step = 1 + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], stop=space['high'], - step=space['step']) - else: - gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], - stop=space['high'], - num=num_values_from_range, - endpoint=False) - else: - if self.gene_type[space_idx][0] in pygad.GA.supported_int_types: - if 'step' in space.keys(): - step = space['step'] + step=step) else: - step = 1 - - gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], - stop=space['high'], - step=step) + if 'step' in space.keys(): + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=space['step']) + else: + gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], + stop=space['high'], + num=num_values_from_inf_range, + endpoint=False) else: - if 'step' in space.keys(): + if self.gene_type[space_idx][0] in pygad.GA.supported_int_types: + if 'step' in space.keys(): + step = space['step'] + else: + step = 1 + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], stop=space['high'], - step=space['step']) + step=step) else: - gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], - stop=space['high'], - num=num_values_from_range, - endpoint=False) - - elif type(space) in [list, tuple, numpy.ndarray]: - # list/tuple/numpy.ndarray - # Convert all to list - gene_space_unpacked[space_idx] = list(space) - - # Check if there is an item with the value None. If so, replace it with a random value using the mutation range. - none_indices = numpy.where(numpy.array(gene_space_unpacked[space_idx]) == None)[0] - if len(none_indices) > 0: - for idx in none_indices: - random_value = numpy.random.uniform(low=self.random_mutation_min_val, - high=self.random_mutation_max_val, - size=1) - gene_space_unpacked[space_idx][idx] = random_value - - if self.gene_type_single == True: # self.gene_type_single - # Change the data type. - gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], - dtype=self.gene_type[0]) - if not self.gene_type[1] is None: - # Round the values for float (non-int) data types. - gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], - self.gene_type[1]) - else: - # Change the data type. - gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], - self.gene_type[space_idx][0]) - if not self.gene_type[space_idx][1] is None: - # Round the values for float (non-int) data types. - gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], - self.gene_type[space_idx][1]) + if 'step' in space.keys(): + gene_space_unpacked[space_idx] = numpy.arange(start=space['low'], + stop=space['high'], + step=space['step']) + else: + gene_space_unpacked[space_idx] = numpy.linspace(start=space['low'], + stop=space['high'], + num=num_values_from_inf_range, + endpoint=False) + + elif type(space) in [numpy.ndarray, list, tuple]: + # list/tuple/numpy.ndarray + # Convert all to list + gene_space_unpacked[space_idx] = list(space) + + # Check if there is an item with the value None. If so, replace it with a random value using the mutation range. + none_indices = numpy.where(numpy.array(gene_space_unpacked[space_idx]) == None)[0] + if len(none_indices) > 0: + for idx in none_indices: + random_value = numpy.random.uniform(low=self.random_mutation_min_val, + high=self.random_mutation_max_val, + size=1) + gene_space_unpacked[space_idx][idx] = random_value + + if self.gene_type_single == True: # self.gene_type_single + # Change the data type. + gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], + dtype=self.gene_type[0]) + if not self.gene_type[1] is None: + # Round the values for float (non-int) data types. + gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], + self.gene_type[1]) + else: + # Change the data type. + gene_space_unpacked[space_idx] = numpy.array(gene_space_unpacked[space_idx], + self.gene_type[space_idx][0]) + if not self.gene_type[space_idx][1] is None: + # Round the values for float (non-int) data types. + gene_space_unpacked[space_idx] = numpy.round(gene_space_unpacked[space_idx], + self.gene_type[space_idx][1]) return gene_space_unpacked @@ -695,9 +753,9 @@ def solve_duplicates_deeply(self, The solution after solving the duplicates or the None if duplicates cannot be solved. """ - gene_space_unpacked = self.unpack_gene_space() - # Create a copy into the attribute because it will be changed later. - self.gene_space_unpacked = gene_space_unpacked.copy() + # gene_space_unpacked = self.unpack_gene_space() + # Create a copy of the gene_space_unpacked attribute because it will be changed later. + gene_space_unpacked = self.gene_space_unpacked.copy() duplicate_index, duplicate_value = self.find_two_duplicates(solution, gene_space_unpacked) @@ -726,7 +784,7 @@ def solve_duplicates_deeply(self, # 2.3 Set the target gene to the value V. # Set the space of the duplicate gene to empty list []. Do not remove it to not alter the indices of the gene spaces. gene_space_unpacked[duplicate_index] = [] - + for other_value in gene_other_values: for space_idx, space in enumerate(gene_space_unpacked): if other_value in space: diff --git a/pygad/pygad.py b/pygad/pygad.py index ef12c94..b7382cc 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -187,6 +187,14 @@ def __init__(self, self.mutation_by_replacement = mutation_by_replacement + # Validate allow_duplicate_genes + if not (type(allow_duplicate_genes) is bool): + self.valid_parameters = False + self.logger.error("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) + raise TypeError("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) + + self.allow_duplicate_genes = allow_duplicate_genes + # Validate gene_space self.gene_space_nested = False if type(gene_space) is type(None): @@ -203,7 +211,7 @@ def __init__(self, raise ValueError("'gene_space' cannot be empty (i.e. its length must be >= 0).") else: for index, el in enumerate(gene_space): - if type(el) in [list, tuple, range, numpy.ndarray]: + if type(el) in [numpy.ndarray, list, tuple, range]: if len(el) == 0: self.valid_parameters = False self.logger.error("The element indexed {index} of 'gene_space' with type {el_type} cannot be empty (i.e. its length must be >= 0).".format(index=index, el_type=type(el))) @@ -361,6 +369,9 @@ def __init__(self, self.logger.error("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) raise ValueError("The value passed to the 'gene_type' parameter must be either a single integer, floating-point, list, tuple, or numpy.ndarray but ({gene_type_val}) of type {gene_type_type} found.".format(gene_type_val=gene_type, gene_type_type=type(gene_type))) + # Call the unpack_gene_space() method in the pygad.helper.unique.Unique class. + self.gene_space_unpacked = self.unpack_gene_space() + # Build the initial population if initial_population is None: if (sol_per_pop is None) or (num_genes is None): @@ -421,21 +432,43 @@ def __init__(self, # Forcing the initial_population array to have the data type assigned to the gene_type parameter. if self.gene_type_single == True: if self.gene_type[1] == None: - self.initial_population = numpy.array(initial_population, dtype=self.gene_type[0]) + self.initial_population = numpy.array(initial_population, + dtype=self.gene_type[0]) else: - self.initial_population = numpy.round(numpy.array(initial_population, dtype=self.gene_type[0]), self.gene_type[1]) + # This block is reached only for non-integer data types (i.e. float). + self.initial_population = numpy.round(numpy.array(initial_population, + dtype=self.gene_type[0]), + self.gene_type[1]) else: initial_population = numpy.array(initial_population) - self.initial_population = numpy.zeros(shape=(initial_population.shape[0], initial_population.shape[1]), dtype=object) + self.initial_population = numpy.zeros(shape=(initial_population.shape[0], + initial_population.shape[1]), + dtype=object) for gene_idx in range(initial_population.shape[1]): if self.gene_type[gene_idx][1] is None: self.initial_population[:, gene_idx] = numpy.asarray(initial_population[:, gene_idx], dtype=self.gene_type[gene_idx][0]) else: + # This block is reached only for non-integer data types (i.e. float). self.initial_population[:, gene_idx] = numpy.round(numpy.asarray(initial_population[:, gene_idx], dtype=self.gene_type[gene_idx][0]), self.gene_type[gene_idx][1]) + # Check if duplicates are allowed. If not, then solve any exisiting duplicates in the passed initial population. + if self.allow_duplicate_genes == False: + for initial_solution_idx, initial_solution in enumerate(self.initial_population): + if self.gene_space is None: + self.initial_population[initial_solution_idx], _, _ = self.solve_duplicate_genes_randomly(solution=initial_solution, + min_val=self.init_range_low, + max_val=self.init_range_high, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) + else: + self.initial_population[initial_solution_idx], _, _ = self.solve_duplicate_genes_by_space(solution=initial_solution, + gene_type=self.gene_type, + num_trials=10) + self.population = self.initial_population.copy() # A NumPy array holding the initial population. self.num_genes = self.initial_population.shape[1] # Number of genes in the solution. self.sol_per_pop = self.initial_population.shape[0] # Number of solutions in the population. @@ -1096,14 +1129,6 @@ def __init__(self, self.logger.error("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) raise TypeError("The value passed to the 'save_solutions' parameter must be of type bool but {save_solutions_type} found.".format(save_solutions_type=type(save_solutions))) - # Validate allow_duplicate_genes - if not (type(allow_duplicate_genes) is bool): - self.valid_parameters = False - self.logger.error("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) - raise TypeError("The expected type of the 'allow_duplicate_genes' parameter is bool but {allow_duplicate_genes_type} found.".format(allow_duplicate_genes_type=type(allow_duplicate_genes))) - - self.allow_duplicate_genes = allow_duplicate_genes - self.stop_criteria = [] self.supported_stop_words = ["reach", "saturate"] if stop_criteria is None: @@ -1259,7 +1284,8 @@ def round_genes(self, solutions): for gene_idx in range(self.num_genes): if self.gene_type_single: if not self.gene_type[1] is None: - solutions[:, gene_idx] = numpy.round(solutions[:, gene_idx], self.gene_type[1]) + solutions[:, gene_idx] = numpy.round(solutions[:, gene_idx], + self.gene_type[1]) else: if not self.gene_type[gene_idx][1] is None: solutions[:, gene_idx] = numpy.round(numpy.asarray(solutions[:, gene_idx], @@ -1322,10 +1348,14 @@ def initialize_population(self, elif self.gene_space_nested: if self.gene_type_single == True: - self.population = numpy.zeros(shape=self.pop_size, dtype=self.gene_type[0]) + # Reaching this block means: + # 1) gene_space is nested (gene_space_nested is True). + # 2) gene_type is not nested (gene_type_single is True). + self.population = numpy.zeros(shape=self.pop_size, + dtype=self.gene_type[0]) for sol_idx in range(self.sol_per_pop): for gene_idx in range(self.num_genes): - if type(self.gene_space[gene_idx]) == type(None): + if self.gene_space[gene_idx] is None: # The following commented code replace the None value with a single number that will not change again. # This means the gene value will be the same across all solutions. @@ -1339,12 +1369,16 @@ def initialize_population(self, high=high, size=1), dtype=self.gene_type[0])[0] - elif type(self.gene_space[gene_idx]) in [list, tuple, range]: + elif type(self.gene_space[gene_idx]) in [numpy.ndarray, list, tuple, range]: # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. if type(self.gene_space[gene_idx]) is range: - temp = self.gene_space[gene_idx] + temp_gene_space = self.gene_space[gene_idx] else: - temp = list(self.gene_space[gene_idx]).copy() + # Convert to list because tuple and range do not have copy(). + # We copy the gene_space to a temp variable to keep its original value. + # In the next for loop, the gene_space is changed. + # Later, the gene_space is restored to its original value using the temp variable. + temp_gene_space = list(self.gene_space[gene_idx]).copy() for idx, val in enumerate(self.gene_space[gene_idx]): if val is None: @@ -1359,8 +1393,10 @@ def initialize_population(self, else: # If there is no unique values, then we have to select a duplicate value. self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) + self.population[sol_idx, gene_idx] = self.gene_type[0](self.population[sol_idx, gene_idx]) - self.gene_space[gene_idx] = list(temp).copy() + # Restore the gene_space from the temp_gene_space variable. + self.gene_space[gene_idx] = list(temp_gene_space).copy() elif type(self.gene_space[gene_idx]) is dict: if 'step' in self.gene_space[gene_idx].keys(): self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], @@ -1375,22 +1411,36 @@ def initialize_population(self, dtype=self.gene_type[0])[0] elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] + else: + # There is no more options. + pass else: - self.population = numpy.zeros(shape=self.pop_size, dtype=object) + # Reaching this block means: + # 1) gene_space is nested (gene_space_nested is True). + # 2) gene_type is nested (gene_type_single is False). + self.population = numpy.zeros(shape=self.pop_size, + dtype=object) for sol_idx in range(self.sol_per_pop): for gene_idx in range(self.num_genes): - if type(self.gene_space[gene_idx]) in [list, tuple, range]: + if type(self.gene_space[gene_idx]) in [numpy.ndarray, list, tuple, range]: + # Convert to list because tuple and range do not have copy(). + # We copy the gene_space to a temp variable to keep its original value. + # In the next for loop, the gene_space is changed. + # Later, the gene_space is restored to its original value using the temp variable. + temp_gene_space = list(self.gene_space[gene_idx]).copy() + # Check if the gene space has None values. If any, then replace it with randomly generated values according to the 3 attributes init_range_low, init_range_high, and gene_type. - temp = list(self.gene_space[gene_idx]).copy() for idx, val in enumerate(self.gene_space[gene_idx]): if val is None: self.gene_space[gene_idx][idx] = numpy.asarray(numpy.random.uniform(low=low, high=high, size=1), dtype=self.gene_type[gene_idx][0])[0] + self.population[sol_idx, gene_idx] = random.choice(self.gene_space[gene_idx]) self.population[sol_idx, gene_idx] = self.gene_type[gene_idx][0](self.population[sol_idx, gene_idx]) - self.gene_space[gene_idx] = temp.copy() + # Restore the gene_space from the temp_gene_space variable. + self.gene_space[gene_idx] = temp_gene_space.copy() elif type(self.gene_space[gene_idx]) is dict: if 'step' in self.gene_space[gene_idx].keys(): self.population[sol_idx, gene_idx] = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], @@ -1404,22 +1454,24 @@ def initialize_population(self, size=1), dtype=self.gene_type[gene_idx][0])[0] elif type(self.gene_space[gene_idx]) == type(None): - # self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - # high=high, - # size=1), - # dtype=self.gene_type[gene_idx][0])[0] + temp_gene_value = numpy.asarray(numpy.random.uniform(low=low, + high=high, + size=1), + dtype=self.gene_type[gene_idx][0])[0] - # self.population[sol_idx, gene_idx] = list(self.gene_space[gene_idx]).copy() - - temp = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[gene_idx][0])[0] - self.population[sol_idx, gene_idx] = temp + self.population[sol_idx, gene_idx] = temp_gene_value.copy() elif type(self.gene_space[gene_idx]) in GA.supported_int_float_types: self.population[sol_idx, gene_idx] = self.gene_space[gene_idx] + else: + # There is no more options. + pass else: + # Handle the non-nested gene_space. It can be assigned a numeric value, list, numpy.ndarray, or a dict. if self.gene_type_single == True: + # Reaching this block means: + # 1) gene_space is not nested (gene_space_nested is False). + # 2) gene_type is not nested (gene_type_single is True). + # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. for idx, curr_gene_space in enumerate(self.gene_space): if curr_gene_space is None: @@ -1446,25 +1498,23 @@ def initialize_population(self, size=self.pop_size), dtype=self.gene_type[0]) # A NumPy array holding the initial population. else: - # Replace all the None values with random values using the init_range_low, init_range_high, and gene_type attributes. - for gene_idx, curr_gene_space in enumerate(self.gene_space): - if curr_gene_space is None: - self.gene_space[gene_idx] = numpy.asarray(numpy.random.uniform(low=low, - high=high, - size=1), - dtype=self.gene_type[gene_idx][0])[0] - + # Reaching this block means: + # 1) gene_space is not nested (gene_space_nested is False). + # 2) gene_type is nested (gene_type_single is False). + # Creating the initial population by randomly selecting the genes' values from the values inside the 'gene_space' parameter. if type(self.gene_space) is dict: # Create an empty population of dtype=object to support storing mixed data types within the same array. - self.population = numpy.zeros(shape=self.pop_size, dtype=object) + self.population = numpy.zeros(shape=self.pop_size, + dtype=object) # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. for gene_idx in range(self.num_genes): + # Generate the values of the current gene across all solutions. # A vector of all values of this single gene across all solutions in the population. - if 'step' in self.gene_space[gene_idx].keys(): - gene_values = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space[gene_idx]['low'], - stop=self.gene_space[gene_idx]['high'], - step=self.gene_space[gene_idx]['step']), + if 'step' in self.gene_space.keys(): + gene_values = numpy.asarray(numpy.random.choice(numpy.arange(start=self.gene_space['low'], + stop=self.gene_space['high'], + step=self.gene_space['step']), size=self.pop_size[0]), dtype=self.gene_type[gene_idx][0]) else: @@ -1476,6 +1526,9 @@ def initialize_population(self, self.population[:, gene_idx] = gene_values else: + # Reaching this block means that the gene_space is not None or dict. + # It can be either range, numpy.ndarray, or list. + # Create an empty population of dtype=object to support storing mixed data types within the same array. self.population = numpy.zeros(shape=self.pop_size, dtype=object) # Loop through the genes, randomly generate the values of a single gene across the entire population, and add the values of each gene to the population. diff --git a/pygad/utils/crossover.py b/pygad/utils/crossover.py index 8eff279..6cc9a27 100644 --- a/pygad/utils/crossover.py +++ b/pygad/utils/crossover.py @@ -64,6 +64,7 @@ def single_point_crossover(self, parents, offspring_size): gene_type=self.gene_type, num_trials=10) + return offspring def two_points_crossover(self, parents, offspring_size): @@ -120,15 +121,15 @@ def two_points_crossover(self, parents, offspring_size): if self.allow_duplicate_genes == False: if self.gene_space is None: offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], - min_val=self.random_mutation_min_val, - max_val=self.random_mutation_max_val, - mutation_by_replacement=self.mutation_by_replacement, - gene_type=self.gene_type, - num_trials=10) + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) else: offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], - gene_type=self.gene_type, - num_trials=10) + gene_type=self.gene_type, + num_trials=10) return offspring def uniform_crossover(self, parents, offspring_size): @@ -180,15 +181,15 @@ def uniform_crossover(self, parents, offspring_size): if self.allow_duplicate_genes == False: if self.gene_space is None: offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], - min_val=self.random_mutation_min_val, - max_val=self.random_mutation_max_val, - mutation_by_replacement=self.mutation_by_replacement, - gene_type=self.gene_type, - num_trials=10) + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) else: offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], - gene_type=self.gene_type, - num_trials=10) + gene_type=self.gene_type, + num_trials=10) return offspring @@ -236,13 +237,13 @@ def scattered_crossover(self, parents, offspring_size): if self.allow_duplicate_genes == False: if self.gene_space is None: offspring[k], _, _ = self.solve_duplicate_genes_randomly(solution=offspring[k], - min_val=self.random_mutation_min_val, - max_val=self.random_mutation_max_val, - mutation_by_replacement=self.mutation_by_replacement, - gene_type=self.gene_type, - num_trials=10) + min_val=self.random_mutation_min_val, + max_val=self.random_mutation_max_val, + mutation_by_replacement=self.mutation_by_replacement, + gene_type=self.gene_type, + num_trials=10) else: offspring[k], _, _ = self.solve_duplicate_genes_by_space(solution=offspring[k], - gene_type=self.gene_type, - num_trials=10) + gene_type=self.gene_type, + num_trials=10) return offspring diff --git a/tests/test_allow_duplicate_genes.py b/tests/test_allow_duplicate_genes.py index cc3db96..af7b652 100644 --- a/tests/test_allow_duplicate_genes.py +++ b/tests/test_allow_duplicate_genes.py @@ -2,7 +2,18 @@ import random import numpy -num_generations = 100 +num_generations = 1 + +initial_population = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] def number_duplicate_genes(gene_space=None, gene_type=float, @@ -11,7 +22,9 @@ def number_duplicate_genes(gene_space=None, random_mutation_min_val=-1, random_mutation_max_val=1, init_range_low=-4, - init_range_high=4): + init_range_high=4, + random_seed=123, + initial_population=None): def fitness_func(ga, solution, idx): return random.random() @@ -23,12 +36,14 @@ def fitness_func(ga, solution, idx): num_genes=num_genes, gene_space=gene_space, gene_type=gene_type, + initial_population=initial_population, init_range_low=init_range_low, init_range_high=init_range_high, random_mutation_min_val=random_mutation_min_val, random_mutation_max_val=random_mutation_max_val, allow_duplicate_genes=False, mutation_by_replacement=mutation_by_replacement, + random_seed=random_seed, save_solutions=True, suppress_warnings=True) @@ -49,6 +64,11 @@ def test_number_duplicates_default(): assert num_duplicates == 0 +def test_number_duplicates_default_initial_population(): + num_duplicates = number_duplicate_genes(initial_population=initial_population) + + assert num_duplicates == 0 + def test_number_duplicates_float_gene_type(): num_genes = 10 num_duplicates = number_duplicate_genes(gene_type=float, @@ -60,6 +80,18 @@ def test_number_duplicates_float_gene_type(): assert num_duplicates == 0 +def test_number_duplicates_float_gene_type_initial_population(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_type=float, + num_genes=num_genes, + init_range_low=0, + init_range_high=1, + initial_population=initial_population, + random_mutation_min_val=0, + random_mutation_max_val=1) + + assert num_duplicates == 0 + def test_number_duplicates_int_gene_type(): num_genes = 10 init_range_low = 0 @@ -76,6 +108,23 @@ def test_number_duplicates_int_gene_type(): assert num_duplicates == 0 +def test_number_duplicates_int_gene_type_initial_population(): + num_genes = 10 + init_range_low = 0 + init_range_high = init_range_low + num_genes + random_mutation_min_val = 0 + random_mutation_max_val = random_mutation_min_val + num_genes + num_duplicates = number_duplicate_genes(gene_type=int, + mutation_by_replacement=False, + num_genes=num_genes, + init_range_low=init_range_low, + init_range_high=init_range_high, + initial_population=initial_population, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val) + + assert num_duplicates == 0 + def test_number_duplicates_int_gene_type_replacement(): num_genes = 10 init_range_low = 0 @@ -92,12 +141,36 @@ def test_number_duplicates_int_gene_type_replacement(): assert num_duplicates == 0 +def test_number_duplicates_int_gene_type_replacement_initial_population(): + num_genes = 10 + init_range_low = 0 + init_range_high = init_range_low + num_genes + random_mutation_min_val = 0 + random_mutation_max_val = random_mutation_min_val + num_genes + num_duplicates = number_duplicate_genes(gene_type=int, + mutation_by_replacement=True, + num_genes=num_genes, + init_range_low=init_range_low, + init_range_high=init_range_high, + initial_population=initial_population, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val) + + assert num_duplicates == 0 + def test_number_duplicates_single_gene_space(): num_duplicates = number_duplicate_genes(gene_space=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], num_genes=10) assert num_duplicates == 0 +def test_number_duplicates_single_gene_space_initial_population(): + num_duplicates = number_duplicate_genes(gene_space=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + num_genes=10, + initial_population=initial_population) + + assert num_duplicates == 0 + def test_number_duplicates_single_range_gene_space(): num_genes = 10 num_duplicates = number_duplicate_genes(gene_space=range(num_genes), @@ -105,6 +178,14 @@ def test_number_duplicates_single_range_gene_space(): assert num_duplicates == 0 +def test_number_duplicates_single_range_gene_space_initial_population(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_space=range(num_genes), + num_genes=num_genes, + initial_population=initial_population) + + assert num_duplicates == 0 + def test_number_duplicates_single_numpy_range_gene_space(): num_genes = 10 num_duplicates = number_duplicate_genes(gene_space=numpy.arange(num_genes), @@ -112,6 +193,14 @@ def test_number_duplicates_single_numpy_range_gene_space(): assert num_duplicates == 0 +def test_number_duplicates_single_numpy_range_gene_space_initial_population(): + num_genes = 10 + num_duplicates = number_duplicate_genes(gene_space=numpy.arange(num_genes), + num_genes=num_genes, + initial_population=initial_population) + + assert num_duplicates == 0 + def test_number_duplicates_nested_gene_space(): num_duplicates = number_duplicate_genes(gene_space=[[0, 1], [1, 2], @@ -124,27 +213,129 @@ def test_number_duplicates_nested_gene_space(): [8, 9], [9, 10]], gene_type=int, - random_mutation_min_val=11, - random_mutation_max_val=20, - mutation_by_replacement=True, num_genes=10) assert num_duplicates == 0 +def test_number_duplicates_nested_gene_space_initial_population(): + num_duplicates = number_duplicate_genes(gene_space=[[0, 1], + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10]], + gene_type=int, + num_genes=10, + initial_population=initial_population) + + assert num_duplicates == 0 + + +# def test_number_duplicates_nested_gene_space_nested_gene_type(): + """ + This example causes duplicate genes that can only be solved by changing the values of a chain of genes. + Let's explain it using this solution: [0, 2, 3, 4, 5, 6, 6, 7, 8, 9] + It has 2 genes with the value 6 at indices 5 and 6. + According to the gene space, none of these genes can has a different value that solves the duplicates. + -If the value of the gene at index 5 is changed from 6 to 5, then it causes another duplicate with the gene at index 4. + -If the value of the gene at index 6 is changed from 6 to 7, then it causes another duplicate with the gene at index 7. + The solution is to change a chain of genes that make a room to solve the duplicates between the 2 genes. + 1) Change the second gene from 2 to 1. + 2) Change the third gene from 3 to 2. + 3) Change the fourth gene from 4 to 3. + 4) Change the fifth gene from 5 to 4. + 5) Change the sixth gene from 6 to 5. This solves the duplicates. + But this is NOT SUPPORTED yet. + We support changing only a single gene that makes a room to solve the duplicates. + + Let's explain it using this solution: [1, 2, 2, 4, 5, 6, 6, 7, 8, 9] + It has 2 genes with the value 2 at indices 1 and 2. + This is how the duplicates are solved: + 1) Change the first gene from 1 to 0. + 2) Change the second gene from 2 to 1. This solves the duplicates. + The result is [0, 1, 2, 4, 5, 6, 6, 7, 8, 9] + """ + # num_duplicates = number_duplicate_genes(gene_space=[[0, 1], + # [1, 2], + # [2, 3], + # [3, 4], + # [4, 5], + # [5, 6], + # [6, 7], + # [7, 8], + # [8, 9], + # [9, 10]], + # gene_type=[int, int, int, int, int, int, int, int, int, int], + # num_genes=10) + + # assert num_duplicates == 0 + +def test_number_duplicates_nested_gene_space_nested_gene_type_initial_population(): + num_duplicates = number_duplicate_genes(gene_space=[[0, 1], + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10]], + gene_type=[int, int, int, int, int, int, int, int, int, int], + num_genes=10, + initial_population=initial_population) + + assert num_duplicates == 0 + if __name__ == "__main__": - # print() - # test_number_duplicates_default() - # print() - # test_number_duplicates_float_gene_type() - # print() - # test_number_duplicates_int_gene_type() - # print() - # test_number_duplicates_single_gene_space() - # print() - # test_number_duplicates_single_range_gene_space() - # print() - # test_number_duplicates_single_numpy_range_gene_space() print() + test_number_duplicates_default() + print() + test_number_duplicates_default_initial_population() + print() + + test_number_duplicates_float_gene_type() + print() + test_number_duplicates_float_gene_type_initial_population() + print() + + test_number_duplicates_int_gene_type() + print() + test_number_duplicates_int_gene_type_initial_population() + print() + + test_number_duplicates_int_gene_type_replacement() + print() + test_number_duplicates_int_gene_type_replacement_initial_population() + print() + + test_number_duplicates_single_gene_space() + print() + test_number_duplicates_single_gene_space_initial_population() + print() + + test_number_duplicates_single_range_gene_space() + print() + test_number_duplicates_single_range_gene_space_initial_population() + print() + + test_number_duplicates_single_numpy_range_gene_space() + print() + test_number_duplicates_single_numpy_range_gene_space_initial_population() + print() + test_number_duplicates_nested_gene_space() print() - + test_number_duplicates_nested_gene_space_initial_population() + print() + + # This example causes duplicates that can only be solved by changing a chain of genes. + # test_number_duplicates_nested_gene_space_nested_gene_type() + # print() + test_number_duplicates_nested_gene_space_nested_gene_type_initial_population() + print() + diff --git a/tests/test_gene_space.py b/tests/test_gene_space.py new file mode 100644 index 0000000..bdda870 --- /dev/null +++ b/tests/test_gene_space.py @@ -0,0 +1,345 @@ +import pygad +import random +import numpy + +num_generations = 100 + +initial_population = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] + +# Test single gene space with nested gene type. + +def number_respect_gene_space(gene_space=None, + gene_type=float, + num_genes=10, + mutation_by_replacement=False, + random_mutation_min_val=-1, + random_mutation_max_val=1, + init_range_low=-4, + init_range_high=4, + initial_population=None): + + def fitness_func(ga, solution, idx): + return random.random() + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=fitness_func, + sol_per_pop=10, + num_genes=num_genes, + gene_space=gene_space, + gene_type=gene_type, + initial_population=initial_population, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val, + allow_duplicate_genes=True, + mutation_by_replacement=mutation_by_replacement, + save_solutions=True, + suppress_warnings=True) + + ga_instance.run() + ga_instance.solutions = numpy.array(ga_instance.solutions, + dtype=object) + + # gene_space_unpacked = ga_instance.unpack_gene_space(num_values_from_inf_range=100) + num_outside = 0 + if ga_instance.gene_space_nested == True: + for gene_idx in range(ga_instance.num_genes): + all_gene_values = ga_instance.solutions[:, gene_idx] + if type(ga_instance.gene_space[gene_idx]) in [list, tuple, range, numpy.ndarray]: + current_gene_space = list(ga_instance.gene_space[gene_idx]) + for val in all_gene_values: + if val in current_gene_space: + # print(val, current_gene_space) + pass + else: + # print(gene_idx, val, current_gene_space) + num_outside += 1 + elif type(ga_instance.gene_space[gene_idx]) is dict: + if not "step" in ga_instance.gene_space[gene_idx].keys(): + for val in all_gene_values: + if val >= ga_instance.gene_space[gene_idx]["low"] and val < ga_instance.gene_space[gene_idx]["high"]: + pass + else: + num_outside += 1 + else: + gene_space_values = numpy.arange(ga_instance.gene_space[gene_idx]["low"], + ga_instance.gene_space[gene_idx]["high"], + ga_instance.gene_space[gene_idx]["step"]) + for val in all_gene_values: + if val in gene_space_values: + pass + else: + num_outside += 1 + elif type(ga_instance.gene_space[gene_idx]) in ga_instance.supported_int_float_types: + for val in all_gene_values: + if val == ga_instance.gene_space[gene_idx]: + pass + else: + num_outside += 1 + else: + for gene_idx in range(ga_instance.num_genes): + all_gene_values = ga_instance.solutions[:, gene_idx] + # print("all_gene_values", gene_idx, all_gene_values) + if type(ga_instance.gene_space) in [list, tuple, range, numpy.ndarray]: + current_gene_space = list(ga_instance.gene_space) + for val in all_gene_values: + if val in current_gene_space: + pass + else: + num_outside += 1 + elif type(ga_instance.gene_space) is dict: + if not "step" in ga_instance.gene_space.keys(): + for val in all_gene_values: + if val >= ga_instance.gene_space["low"] and val < ga_instance.gene_space["high"]: + pass + else: + num_outside += 1 + else: + gene_space_values = numpy.arange(ga_instance.gene_space["low"], + ga_instance.gene_space["high"], + ga_instance.gene_space["step"]) + for val in all_gene_values: + if val in gene_space_values: + pass + else: + num_outside += 1 + + print("Number of outside range is {num_outside}.".format(num_outside=num_outside)) + return num_outside, ga_instance + +def test_gene_space_range(): + num_outside, _ = number_respect_gene_space(gene_space=range(10)) + + assert num_outside == 0 + +def test_gene_space_numpy_arange(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.arange(10)) + + assert num_outside == 0 + +def test_gene_space_list(): + num_outside, _ = number_respect_gene_space(gene_space=list(range(10))) + + assert num_outside == 0 + +def test_gene_space_numpy(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.array(list(range(10)))) + + assert num_outside == 0 + +def test_gene_space_dict_without_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10}) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_gene_space_dict_with_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10, "step": 2}) + + assert num_outside == 0 + +def test_gene_space_list_single_value(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[5]) + + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_range(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[range(0, 10), + range(10, 20), + range(20, 30), + range(30, 40), + range(40, 50), + range(50, 60), + range(60, 70), + range(70, 80), + range(80, 90), + range(90, 100)]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_dict_without_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10}, + {"low": 10, "high": 20}, + {"low": 20, "high": 30}, + {"low": 30, "high": 40}, + {"low": 40, "high": 50}, + {"low": 50, "high": 60}, + {"low": 60, "high": 70}, + {"low": 70, "high": 80}, + {"low": 80, "high": 90}, + {"low": 90, "high": 100}]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_dict_with_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10, "step": 1}, + {"low": 10, "high": 20, "step": 1.5}, + {"low": 20, "high": 30, "step": 2}, + {"low": 30, "high": 40, "step": 2.5}, + {"low": 40, "high": 50, "step": 3}, + {"low": 50, "high": 60, "step": 3.5}, + {"low": 60, "high": 70, "step": 4}, + {"low": 70, "high": 80, "step": 4.5}, + {"low": 80, "high": 90, "step": 5}, + {"low": 90, "high": 100, "step": 5.5}]) + # print(ga_instance.population) + + assert num_outside == 0 + + +def test_nested_gene_space_numpy_arange(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[numpy.arange(0, 10), + numpy.arange(10, 20), + numpy.arange(20, 30), + numpy.arange(30, 40), + numpy.arange(40, 50), + numpy.arange(50, 60), + numpy.arange(60, 70), + numpy.arange(70, 80), + numpy.arange(80, 90), + numpy.arange(90, 100)]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_list(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [-10, 10, 20, 30, 40, 50, 60, 70, 80, 90], + [-11, 11, 22, 33, 44, 55, 66, 77, 88, 99], + [-100, 100, 200, 300, 400, 500, 600, 700, 800, 900], + [-4.1, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + [-5.1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9], + [-10.5, 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9], + [-15, 15, 25, 35, 45, 55, 65, 75, 85, 95], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_list2(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1], + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10]]) + + assert num_outside == 0 + +def test_nested_gene_space_mix(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]], + gene_type=int) + + assert num_outside == 0 + +def test_nested_gene_space_mix_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_mix_initial_population(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + numpy.arange(0, 10), + range(0, 10), + {"low": 0, "high": 10}, + {"low": 00, "high": 10, "step": 1}, + range(0, 10), + numpy.arange(0, 10), + numpy.arange(0, 10), + {"low": 0, "high": 10}, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]], + initial_population=initial_population) + # print(ga_instance.population) + + assert num_outside == 0 + +if __name__ == "__main__": + print() + test_gene_space_range() + print() + + test_gene_space_numpy_arange() + print() + + test_gene_space_list() + print() + + test_gene_space_list_single_value() + print() + + test_gene_space_numpy() + print() + + test_gene_space_dict_without_step() + print() + + test_gene_space_dict_with_step() + print() + + test_nested_gene_space_range() + print() + + test_nested_gene_space_dict_without_step() + print() + + test_nested_gene_space_dict_with_step() + print() + + test_nested_gene_space_numpy_arange() + print() + + test_nested_gene_space_list() + print() + + test_nested_gene_space_list2() + print() + + test_nested_gene_space_mix() + print() + + test_nested_gene_space_mix_nested_gene_type() + print() + + test_nested_gene_space_mix_initial_population() + print() \ No newline at end of file From 6b941522548559f389b84e6487ea749eff49a9d7 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 2 May 2023 22:40:35 -0400 Subject: [PATCH 30/32] Fixes and more tests --- pygad/helper/unique.py | 70 ++-- pygad/utils/mutation.py | 41 +- pygad/utils/parent_selection.py | 4 +- tests/test_gene_space.py | 47 ++- .../test_gene_space_allow_duplicate_genes.py | 391 ++++++++++++++++++ 5 files changed, 497 insertions(+), 56 deletions(-) create mode 100644 tests/test_gene_space_allow_duplicate_genes.py diff --git a/pygad/helper/unique.py b/pygad/helper/unique.py index 3224445..ad4bd8a 100644 --- a/pygad/helper/unique.py +++ b/pygad/helper/unique.py @@ -53,7 +53,7 @@ def solve_duplicate_genes_randomly(self, else: temp_val = numpy.random.uniform(low=min_val, high=max_val, - size=1) + size=1)[0] if mutation_by_replacement: pass else: @@ -69,7 +69,7 @@ def solve_duplicate_genes_randomly(self, else: temp_val = numpy.random.uniform(low=min_val, high=max_val, - size=1) + size=1)[0] if mutation_by_replacement: pass else: @@ -229,7 +229,7 @@ def unique_int_gene_from_range(self, # Note that we already know that the data type is integer. all_gene_values = numpy.asarray(all_gene_values, gene_type[gene_index][0]) - + values_to_select_from = list(set(all_gene_values) - set(solution)) if len(values_to_select_from) == 0: @@ -347,12 +347,12 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=low, high=high, - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space else: if gene_type[gene_idx][0] in pygad.GA.supported_int_types: if build_initial_pop == True: @@ -378,12 +378,12 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=low, high=high, - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space elif type(curr_gene_space) is dict: if self.gene_type_single == True: @@ -409,12 +409,12 @@ def unique_gene_by_space(self, else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space else: # Use index 0 to return the type from the list (e.g. [int, None] or [float, 2]). if gene_type[gene_idx][0] in pygad.GA.supported_int_types: @@ -439,12 +439,12 @@ def unique_gene_by_space(self, else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space else: # Selecting a value randomly based on the current gene's space in the 'gene_space' attribute. @@ -503,12 +503,12 @@ def unique_gene_by_space(self, else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space else: if gene_type[gene_idx][0] in pygad.GA.supported_int_types: if 'step' in self.gene_space.keys(): @@ -533,12 +533,12 @@ def unique_gene_by_space(self, else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] # TODO: Remove check for mutation_by_replacement when solving duplicates. Just replace the gene by the selected value from space. - if self.mutation_by_replacement: - pass - else: - value_from_space = solution[gene_idx] + value_from_space + # if self.mutation_by_replacement: + # pass + # else: + # value_from_space = solution[gene_idx] + value_from_space else: # If the space type is not of type dict, then a value is randomly selected from the gene_space attribute. @@ -562,7 +562,7 @@ def unique_gene_by_space(self, value_from_space = numpy.random.uniform(low=low, high=high, - size=1) + size=1)[0] # Similar to the round_genes() method in the pygad module, # Create a round_gene() method to round a single gene. @@ -722,7 +722,7 @@ def unpack_gene_space(self, for idx in none_indices: random_value = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] gene_space_unpacked[space_idx][idx] = random_value if self.gene_type_single == True: # self.gene_type_single diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index 9ce8722..e6b67ff 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -67,7 +67,7 @@ def mutation_by_space(self, offspring): elif curr_gene_space is None: rand_val = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] if self.mutation_by_replacement: value_from_space = rand_val else: @@ -82,7 +82,7 @@ def mutation_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] else: # Selecting a value randomly based on the current gene's space in the 'gene_space' attribute. # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. @@ -108,7 +108,7 @@ def mutation_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] else: # If the space type is not of type dict, then a value is randomly selected from the gene_space attribute. values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) @@ -120,9 +120,11 @@ def mutation_by_space(self, offspring): # value_from_space = random.choice(self.gene_space) if value_from_space is None: + # TODO: Return index 0. + # TODO: Check if this if statement is necessary. value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # Assinging the selected value from the space to the gene. if self.gene_type_single == True: @@ -135,6 +137,7 @@ def mutation_by_space(self, offspring): if not self.gene_type[gene_idx][1] is None: offspring[offspring_idx, gene_idx] = numpy.round(self.gene_type[gene_idx][0](value_from_space), self.gene_type[gene_idx][1]) + else: offspring[offspring_idx, gene_idx] = self.gene_type[gene_idx][0](value_from_space) @@ -172,7 +175,7 @@ def mutation_probs_by_space(self, offspring): elif curr_gene_space is None: rand_val = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] if self.mutation_by_replacement: value_from_space = rand_val else: @@ -187,7 +190,7 @@ def mutation_probs_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] else: # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. @@ -212,7 +215,7 @@ def mutation_probs_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] else: values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) @@ -257,7 +260,7 @@ def mutation_randomly(self, offspring): # Generating a random value. random_value = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. if self.mutation_by_replacement: if self.gene_type_single == True: @@ -312,7 +315,7 @@ def mutation_probs_randomly(self, offspring): # Generating a random value. random_value = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. if self.mutation_by_replacement: if self.gene_type_single == True: @@ -509,7 +512,7 @@ def adaptive_mutation_by_space(self, offspring): elif curr_gene_space is None: rand_val = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] if self.mutation_by_replacement: value_from_space = rand_val else: @@ -524,7 +527,7 @@ def adaptive_mutation_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] else: # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. @@ -549,7 +552,7 @@ def adaptive_mutation_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] else: values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) @@ -562,7 +565,7 @@ def adaptive_mutation_by_space(self, offspring): if value_from_space is None: value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # Assinging the selected value from the space to the gene. if self.gene_type_single == True: @@ -609,7 +612,7 @@ def adaptive_mutation_randomly(self, offspring): # Generating a random value. random_value = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. if self.mutation_by_replacement: if self.gene_type_single == True: @@ -685,7 +688,7 @@ def adaptive_mutation_probs_by_space(self, offspring): elif curr_gene_space is None: rand_val = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] if self.mutation_by_replacement: value_from_space = rand_val else: @@ -700,7 +703,7 @@ def adaptive_mutation_probs_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], - size=1) + size=1)[0] else: # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. # If the gene space has only 1 value, then select it. The old and new values of the gene are identical. @@ -725,7 +728,7 @@ def adaptive_mutation_probs_by_space(self, offspring): else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], - size=1) + size=1)[0] else: values_to_select_from = list(set(self.gene_space) - set([offspring[offspring_idx, gene_idx]])) @@ -737,7 +740,7 @@ def adaptive_mutation_probs_by_space(self, offspring): if value_from_space is None: value_from_space = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # Assinging the selected value from the space to the gene. if self.gene_type_single == True: @@ -786,7 +789,7 @@ def adaptive_mutation_probs_randomly(self, offspring): # Generating a random value. random_value = numpy.random.uniform(low=self.random_mutation_min_val, high=self.random_mutation_max_val, - size=1) + size=1)[0] # If the mutation_by_replacement attribute is True, then the random value replaces the current gene value. if self.mutation_by_replacement: if self.gene_type_single == True: diff --git a/pygad/utils/parent_selection.py b/pygad/utils/parent_selection.py index 7f32181..976e130 100644 --- a/pygad/utils/parent_selection.py +++ b/pygad/utils/parent_selection.py @@ -171,7 +171,9 @@ def stochastic_universal_selection(self, fitness, num_parents): probs[min_probs_idx] = 99999999999 pointers_distance = 1.0 / self.num_parents_mating # Distance between different pointers. - first_pointer = numpy.random.uniform(low=0.0, high=pointers_distance, size=1) # Location of the first pointer. + first_pointer = numpy.random.uniform(low=0.0, + high=pointers_distance, + size=1)[0] # Location of the first pointer. # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation. if self.gene_type_single == True: diff --git a/tests/test_gene_space.py b/tests/test_gene_space.py index bdda870..09bc221 100644 --- a/tests/test_gene_space.py +++ b/tests/test_gene_space.py @@ -1,3 +1,8 @@ +""" +This script is identical to the test_gene_space_allow_duplicate_genes.py script except for: + Setting allow_duplicate_genes=False instead of True. +""" + import pygad import random import numpy @@ -45,7 +50,8 @@ def fitness_func(ga, solution, idx): allow_duplicate_genes=True, mutation_by_replacement=mutation_by_replacement, save_solutions=True, - suppress_warnings=True) + suppress_warnings=True, + random_seed=1) ga_instance.run() ga_instance.solutions = numpy.array(ga_instance.solutions, @@ -186,6 +192,22 @@ def test_nested_gene_space_dict_without_step(): assert num_outside == 0 +def test_nested_gene_space_dict_without_step_float_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10}, + {"low": 10, "high": 20}, + {"low": 20, "high": 30}, + {"low": 30, "high": 40}, + {"low": 40, "high": 50}, + {"low": 50, "high": 60}, + {"low": 60, "high": 70}, + {"low": 70, "high": 80}, + {"low": 80, "high": 90}, + {"low": 90, "high": 100}], + gene_type=[float, 3]) + # print(ga_instance.population) + + assert num_outside == 0 + def test_nested_gene_space_dict_with_step(): num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10, "step": 1}, {"low": 10, "high": 20, "step": 1.5}, @@ -294,6 +316,23 @@ def test_nested_gene_space_mix_initial_population(): assert num_outside == 0 +def test_nested_gene_space_mix_initial_population_single_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + numpy.arange(0, 10), + range(0, 10), + {"low": 0, "high": 10}, + {"low": 0, "high": 10}, + range(0, 10), + numpy.arange(0, 10), + numpy.arange(0, 10), + {"low": 0, "high": 10}, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + gene_type=[float, 2], + initial_population=initial_population) + # print(ga_instance.population) + + assert num_outside == 0 + if __name__ == "__main__": print() test_gene_space_range() @@ -323,6 +362,9 @@ def test_nested_gene_space_mix_initial_population(): test_nested_gene_space_dict_without_step() print() + test_nested_gene_space_dict_without_step_float_gene_type() + print() + test_nested_gene_space_dict_with_step() print() @@ -342,4 +384,7 @@ def test_nested_gene_space_mix_initial_population(): print() test_nested_gene_space_mix_initial_population() + print() + + test_nested_gene_space_mix_initial_population_single_gene_type() print() \ No newline at end of file diff --git a/tests/test_gene_space_allow_duplicate_genes.py b/tests/test_gene_space_allow_duplicate_genes.py new file mode 100644 index 0000000..804129a --- /dev/null +++ b/tests/test_gene_space_allow_duplicate_genes.py @@ -0,0 +1,391 @@ +""" +This script is identical to the test_gene_space.py script except for: + Setting allow_duplicate_genes=False instead of True. +""" + +import pygad +import random +import numpy + +num_generations = 100 + +initial_population = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] + +# Test single gene space with nested gene type. + +def number_respect_gene_space(gene_space=None, + gene_type=float, + num_genes=10, + mutation_by_replacement=False, + random_mutation_min_val=-1, + random_mutation_max_val=1, + init_range_low=-4, + init_range_high=4, + initial_population=None): + + def fitness_func(ga, solution, idx): + return random.random() + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=fitness_func, + sol_per_pop=10, + num_genes=num_genes, + gene_space=gene_space, + gene_type=gene_type, + initial_population=initial_population, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val, + allow_duplicate_genes=False, + mutation_by_replacement=mutation_by_replacement, + save_solutions=True, + suppress_warnings=True, + random_seed=2) + + ga_instance.run() + ga_instance.solutions = numpy.array(ga_instance.solutions, + dtype=object) + + # gene_space_unpacked = ga_instance.unpack_gene_space(num_values_from_inf_range=100) + num_outside = 0 + if ga_instance.gene_space_nested == True: + for gene_idx in range(ga_instance.num_genes): + all_gene_values = ga_instance.solutions[:, gene_idx] + if type(ga_instance.gene_space[gene_idx]) in [list, tuple, range, numpy.ndarray]: + current_gene_space = list(ga_instance.gene_space[gene_idx]) + for val in all_gene_values: + if val in current_gene_space: + # print(val, current_gene_space) + pass + else: + # print(gene_idx, val, current_gene_space) + num_outside += 1 + elif type(ga_instance.gene_space[gene_idx]) is dict: + if not "step" in ga_instance.gene_space[gene_idx].keys(): + for val in all_gene_values: + if val >= ga_instance.gene_space[gene_idx]["low"] and val < ga_instance.gene_space[gene_idx]["high"]: + pass + else: + print(gene_idx, val, current_gene_space, all_gene_values) + num_outside += 1 + else: + gene_space_values = numpy.arange(ga_instance.gene_space[gene_idx]["low"], + ga_instance.gene_space[gene_idx]["high"], + ga_instance.gene_space[gene_idx]["step"]) + for val in all_gene_values: + if val in gene_space_values: + pass + else: + num_outside += 1 + elif type(ga_instance.gene_space[gene_idx]) in ga_instance.supported_int_float_types: + for val in all_gene_values: + if val == ga_instance.gene_space[gene_idx]: + pass + else: + num_outside += 1 + else: + for gene_idx in range(ga_instance.num_genes): + all_gene_values = ga_instance.solutions[:, gene_idx] + # print("all_gene_values", gene_idx, all_gene_values) + if type(ga_instance.gene_space) in [list, tuple, range, numpy.ndarray]: + current_gene_space = list(ga_instance.gene_space) + for val in all_gene_values: + if val in current_gene_space: + pass + else: + num_outside += 1 + elif type(ga_instance.gene_space) is dict: + if not "step" in ga_instance.gene_space.keys(): + for val in all_gene_values: + if val >= ga_instance.gene_space["low"] and val < ga_instance.gene_space["high"]: + pass + else: + num_outside += 1 + else: + gene_space_values = numpy.arange(ga_instance.gene_space["low"], + ga_instance.gene_space["high"], + ga_instance.gene_space["step"]) + for val in all_gene_values: + if val in gene_space_values: + pass + else: + num_outside += 1 + + print("Number of outside range is {num_outside}.".format(num_outside=num_outside)) + return num_outside, ga_instance + +def test_gene_space_range(): + num_outside, _ = number_respect_gene_space(gene_space=range(10)) + + assert num_outside == 0 + +def test_gene_space_numpy_arange(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.arange(10)) + + assert num_outside == 0 + +def test_gene_space_list(): + num_outside, _ = number_respect_gene_space(gene_space=list(range(10))) + + assert num_outside == 0 + +def test_gene_space_numpy(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.array(list(range(10)))) + + assert num_outside == 0 + +def test_gene_space_dict_without_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10}) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_gene_space_dict_with_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10, "step": 2}) + + assert num_outside == 0 + +def test_gene_space_list_single_value(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[5]) + + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_range(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[range(0, 10), + range(10, 20), + range(20, 30), + range(30, 40), + range(40, 50), + range(50, 60), + range(60, 70), + range(70, 80), + range(80, 90), + range(90, 100)]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_dict_without_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10}, + {"low": 10, "high": 20}, + {"low": 20, "high": 30}, + {"low": 30, "high": 40}, + {"low": 40, "high": 50}, + {"low": 50, "high": 60}, + {"low": 60, "high": 70}, + {"low": 70, "high": 80}, + {"low": 80, "high": 90}, + {"low": 90, "high": 100}]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_dict_without_step_float_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10}, + {"low": 10, "high": 20}, + {"low": 20, "high": 30}, + {"low": 30, "high": 40}, + {"low": 40, "high": 50}, + {"low": 50, "high": 60}, + {"low": 60, "high": 70}, + {"low": 70, "high": 80}, + {"low": 80, "high": 90}, + {"low": 90, "high": 100}], + gene_type=[float, 3]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_dict_with_step(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[{"low": 0, "high": 10, "step": 1}, + {"low": 10, "high": 20, "step": 1.5}, + {"low": 20, "high": 30, "step": 2}, + {"low": 30, "high": 40, "step": 2.5}, + {"low": 40, "high": 50, "step": 3}, + {"low": 50, "high": 60, "step": 3.5}, + {"low": 60, "high": 70, "step": 4}, + {"low": 70, "high": 80, "step": 4.5}, + {"low": 80, "high": 90, "step": 5}, + {"low": 90, "high": 100, "step": 5.5}]) + # print(ga_instance.population) + + assert num_outside == 0 + + +def test_nested_gene_space_numpy_arange(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[numpy.arange(0, 10), + numpy.arange(10, 20), + numpy.arange(20, 30), + numpy.arange(30, 40), + numpy.arange(40, 50), + numpy.arange(50, 60), + numpy.arange(60, 70), + numpy.arange(70, 80), + numpy.arange(80, 90), + numpy.arange(90, 100)]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_list(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [-10, 10, 20, 30, 40, 50, 60, 70, 80, 90], + [-11, 11, 22, 33, 44, 55, 66, 77, 88, 99], + [-100, 100, 200, 300, 400, 500, 600, 700, 800, 900], + [-4.1, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + [-5.1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9], + [-10.5, 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9], + [-15, 15, 25, 35, 45, 55, 65, 75, 85, 95], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_list2(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1], + [1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6], + [6, 7], + [7, 8], + [8, 9], + [9, 10]]) + + assert num_outside == 0 + +def test_nested_gene_space_mix(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]], + gene_type=int) + + assert num_outside == 0 + +def test_nested_gene_space_mix_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_mix_initial_population(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + numpy.arange(0, 10), + range(0, 10), + {"low": 0, "high": 10}, + {"low": 00, "high": 10, "step": 1}, + range(0, 10), + numpy.arange(0, 10), + numpy.arange(0, 10), + {"low": 0, "high": 10}, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]], + initial_population=initial_population) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_nested_gene_space_mix_initial_population_single_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + numpy.arange(0, 10), + range(0, 10), + {"low": 0, "high": 10}, + {"low": 0, "high": 10}, + range(0, 10), + numpy.arange(0, 10), + numpy.arange(0, 10), + {"low": 0, "high": 10}, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], + gene_type=[float, 2], + initial_population=initial_population) + # print(ga_instance.population) + + assert num_outside == 0 + +if __name__ == "__main__": + print() + test_gene_space_range() + print() + + test_gene_space_numpy_arange() + print() + + test_gene_space_list() + print() + + test_gene_space_list_single_value() + print() + + test_gene_space_numpy() + print() + + test_gene_space_dict_without_step() + print() + + test_gene_space_dict_with_step() + print() + + test_nested_gene_space_range() + print() + + test_nested_gene_space_dict_without_step() + print() + + test_nested_gene_space_dict_without_step_float_gene_type() + print() + + test_nested_gene_space_dict_with_step() + print() + + test_nested_gene_space_numpy_arange() + print() + + test_nested_gene_space_list() + print() + + test_nested_gene_space_list2() + print() + + test_nested_gene_space_mix() + print() + + test_nested_gene_space_mix_nested_gene_type() + print() + + test_nested_gene_space_mix_initial_population() + print() + + test_nested_gene_space_mix_initial_population_single_gene_type() + print() \ No newline at end of file From 2f139d3e2c226ce4ccb5cdea37f695c7b549527f Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 3 May 2023 00:38:54 -0400 Subject: [PATCH 31/32] Fixes and more tests --- pygad/utils/mutation.py | 25 ++- tests/test_crossover_mutation.py | 188 ++++++++++++++++++ tests/test_gene_space.py | 63 +++++- .../test_gene_space_allow_duplicate_genes.py | 100 ++++++++-- 4 files changed, 345 insertions(+), 31 deletions(-) create mode 100644 tests/test_crossover_mutation.py diff --git a/pygad/utils/mutation.py b/pygad/utils/mutation.py index e6b67ff..326ba6b 100644 --- a/pygad/utils/mutation.py +++ b/pygad/utils/mutation.py @@ -75,10 +75,13 @@ def mutation_by_space(self, offspring): elif type(curr_gene_space) is dict: # The gene's space of type dict specifies the lower and upper limits of a gene. if 'step' in curr_gene_space.keys(): + # The numpy.random.choice() and numpy.random.uniform() functions return a NumPy array as the output even if the array has a single value. + # We have to return the output at index 0 to force a numeric value to be returned not an object of type numpy.ndarray. + # If numpy.ndarray is returned, then it will cause an issue later while using the set() function. value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], stop=curr_gene_space['high'], step=curr_gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], @@ -104,7 +107,7 @@ def mutation_by_space(self, offspring): value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], stop=self.gene_space['high'], step=self.gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], @@ -186,7 +189,7 @@ def mutation_probs_by_space(self, offspring): value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], stop=curr_gene_space['high'], step=curr_gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], @@ -211,7 +214,7 @@ def mutation_probs_by_space(self, offspring): value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], stop=self.gene_space['high'], step=self.gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], @@ -520,10 +523,13 @@ def adaptive_mutation_by_space(self, offspring): elif type(curr_gene_space) is dict: # Selecting a value randomly from the current gene's space in the 'gene_space' attribute. if 'step' in curr_gene_space.keys(): + # The numpy.random.choice() and numpy.random.uniform() functions return a NumPy array as the output even if the array has a single value. + # We have to return the output at index 0 to force a numeric value to be returned not an object of type numpy.ndarray. + # If numpy.ndarray is returned, then it will cause an issue later while using the set() function. value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], stop=curr_gene_space['high'], step=curr_gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], @@ -548,7 +554,7 @@ def adaptive_mutation_by_space(self, offspring): value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], stop=self.gene_space['high'], step=self.gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], @@ -699,7 +705,7 @@ def adaptive_mutation_probs_by_space(self, offspring): value_from_space = numpy.random.choice(numpy.arange(start=curr_gene_space['low'], stop=curr_gene_space['high'], step=curr_gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=curr_gene_space['low'], high=curr_gene_space['high'], @@ -721,10 +727,13 @@ def adaptive_mutation_probs_by_space(self, offspring): # Selecting a value randomly from the global gene space in the 'gene_space' attribute. if type(self.gene_space) is dict: if 'step' in self.gene_space.keys(): + # The numpy.random.choice() and numpy.random.uniform() functions return a NumPy array as the output even if the array has a single value. + # We have to return the output at index 0 to force a numeric value to be returned not an object of type numpy.ndarray. + # If numpy.ndarray is returned, then it will cause an issue later while using the set() function. value_from_space = numpy.random.choice(numpy.arange(start=self.gene_space['low'], stop=self.gene_space['high'], step=self.gene_space['step']), - size=1) + size=1)[0] else: value_from_space = numpy.random.uniform(low=self.gene_space['low'], high=self.gene_space['high'], diff --git a/tests/test_crossover_mutation.py b/tests/test_crossover_mutation.py new file mode 100644 index 0000000..f65a795 --- /dev/null +++ b/tests/test_crossover_mutation.py @@ -0,0 +1,188 @@ +import pygad +import random +import numpy + +num_generations = 1 + +initial_population = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] + +def output_crossover_mutation(gene_space=None, + gene_type=float, + num_genes=10, + mutation_by_replacement=False, + random_mutation_min_val=-1, + random_mutation_max_val=1, + init_range_low=-4, + init_range_high=4, + initial_population=None, + crossover_probability=None, + mutation_probability=None, + crossover_type=None, + mutation_type=None): + + def fitness_func(ga, solution, idx): + return random.random() + + ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=5, + fitness_func=fitness_func, + sol_per_pop=10, + num_genes=num_genes, + gene_space=gene_space, + gene_type=gene_type, + initial_population=initial_population, + init_range_low=init_range_low, + init_range_high=init_range_high, + random_mutation_min_val=random_mutation_min_val, + random_mutation_max_val=random_mutation_max_val, + allow_duplicate_genes=True, + mutation_by_replacement=mutation_by_replacement, + save_solutions=True, + crossover_probability=crossover_probability, + mutation_probability=mutation_probability, + crossover_type=crossover_type, + mutation_type=mutation_type, + suppress_warnings=True, + random_seed=1) + + ga_instance.run() + + comparison_result = [] + for solution_idx, solution in enumerate(ga_instance.population): + if list(solution) in ga_instance.initial_population.tolist(): + comparison_result.append(True) + else: + comparison_result.append(False) + + comparison_result = numpy.array(comparison_result) + result = numpy.all(comparison_result == True) + + print("Comparison result is {result}".format(result=result)) + return result, ga_instance + +def test_no_crossover_no_mutation(): + result, ga_instance = output_crossover_mutation() + + assert result == True + +def test_no_crossover_no_mutation_gene_space(): + result, ga_instance = output_crossover_mutation(gene_space=range(10)) + + assert result == True + +def test_no_crossover_no_mutation_int_gene_type(): + result, ga_instance = output_crossover_mutation(gene_type=int) + + assert result == True + + +def test_no_crossover_no_mutation_gene_space_gene_type(): + result, ga_instance = output_crossover_mutation(gene_space={"low": 0, "high": 10}, + gene_type=[float, 2]) + + assert result == True + + +def test_no_crossover_no_mutation_nested_gene_space(): + result, ga_instance = output_crossover_mutation(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]]) + assert result == True + +def test_no_crossover_no_mutation_nested_gene_type(): + result, ga_instance = output_crossover_mutation(gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert result == True + +def test_no_crossover_no_mutation_nested_gene_space_nested_gene_type(): + result, ga_instance = output_crossover_mutation(gene_space=[[0, 1, 2, 3, 4], + numpy.arange(5, 10), + range(10, 15), + {"low": 15, "high": 20}, + {"low": 20, "high": 30, "step": 2}, + None, + numpy.arange(30, 35), + numpy.arange(35, 40), + numpy.arange(40, 45), + [45, 46, 47, 48, 49]], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert result == True + +def test_no_crossover_no_mutation_initial_population(): + global initial_population + result, ga_instance = output_crossover_mutation(initial_population=initial_population) + + assert result == True + +def test_no_crossover_no_mutation_initial_population_nested_gene_type(): + global initial_population + result, ga_instance = output_crossover_mutation(initial_population=initial_population, + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert result == True + +def test_crossover_no_mutation_zero_crossover_probability(): + global initial_population + result, ga_instance = output_crossover_mutation(crossover_type="single_point", + crossover_probability=0.0) + + assert result == True + +def test_zero_crossover_probability_zero_mutation_probability(): + global initial_population + result, ga_instance = output_crossover_mutation(crossover_type="single_point", + crossover_probability=0.0, + mutation_type="random", + mutation_probability=0.0) + + assert result == True + +if __name__ == "__main__": + print() + test_no_crossover_no_mutation() + print() + + test_no_crossover_no_mutation_int_gene_type() + print() + + test_no_crossover_no_mutation_gene_space() + print() + + test_no_crossover_no_mutation_gene_space_gene_type() + print() + + test_no_crossover_no_mutation_nested_gene_space() + print() + + test_no_crossover_no_mutation_nested_gene_type() + print() + + test_no_crossover_no_mutation_initial_population() + print() + + test_no_crossover_no_mutation_initial_population_nested_gene_type() + print() + + test_crossover_no_mutation_zero_crossover_probability() + print() + + test_zero_crossover_probability_zero_mutation_probability() + print() + diff --git a/tests/test_gene_space.py b/tests/test_gene_space.py index 09bc221..063acf7 100644 --- a/tests/test_gene_space.py +++ b/tests/test_gene_space.py @@ -1,6 +1,6 @@ """ This script is identical to the test_gene_space_allow_duplicate_genes.py script except for: - Setting allow_duplicate_genes=False instead of True. + Setting allow_duplicate_genes=True instead of False. """ import pygad @@ -51,7 +51,7 @@ def fitness_func(ga, solution, idx): mutation_by_replacement=mutation_by_replacement, save_solutions=True, suppress_warnings=True, - random_seed=1) + random_seed=2) ga_instance.run() ga_instance.solutions = numpy.array(ga_instance.solutions, @@ -162,6 +162,51 @@ def test_gene_space_list_single_value(): assert num_outside == 0 +def test_gene_space_range_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=range(10), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_numpy_arange_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.arange(10), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_list_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=list(range(10)), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_numpy_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.array(list(range(10))), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_dict_without_step_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10}, + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_gene_space_dict_with_step_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10, "step": 2}, + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_list_single_value_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[5], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + # print(ga_instance.population) + + assert num_outside == 0 + def test_nested_gene_space_range(): num_outside, ga_instance = number_respect_gene_space(gene_space=[range(0, 10), range(10, 20), @@ -337,24 +382,38 @@ def test_nested_gene_space_mix_initial_population_single_gene_type(): print() test_gene_space_range() print() + test_gene_space_range_nested_gene_type() + print() test_gene_space_numpy_arange() print() + test_gene_space_numpy_arange_nested_gene_type() + print() test_gene_space_list() print() + test_gene_space_list_nested_gene_type() + print() test_gene_space_list_single_value() print() + test_gene_space_list_single_value_nested_gene_type() + print() test_gene_space_numpy() print() + test_gene_space_numpy_nested_gene_type() + print() test_gene_space_dict_without_step() print() + test_gene_space_dict_without_step_nested_gene_type() + print() test_gene_space_dict_with_step() print() + test_gene_space_dict_with_step_nested_gene_type() + print() test_nested_gene_space_range() print() diff --git a/tests/test_gene_space_allow_duplicate_genes.py b/tests/test_gene_space_allow_duplicate_genes.py index 804129a..2a65f1a 100644 --- a/tests/test_gene_space_allow_duplicate_genes.py +++ b/tests/test_gene_space_allow_duplicate_genes.py @@ -77,7 +77,6 @@ def fitness_func(ga, solution, idx): if val >= ga_instance.gene_space[gene_idx]["low"] and val < ga_instance.gene_space[gene_idx]["high"]: pass else: - print(gene_idx, val, current_gene_space, all_gene_values) num_outside += 1 else: gene_space_values = numpy.arange(ga_instance.gene_space[gene_idx]["low"], @@ -163,6 +162,51 @@ def test_gene_space_list_single_value(): assert num_outside == 0 +def test_gene_space_range_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=range(10), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_numpy_arange_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.arange(10), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_list_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=list(range(10)), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_numpy_nested_gene_type(): + num_outside, _ = number_respect_gene_space(gene_space=numpy.array(list(range(10))), + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_dict_without_step_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10}, + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + # print(ga_instance.population) + + assert num_outside == 0 + +def test_gene_space_dict_with_step_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space={"low": 0, "high": 10, "step": 2}, + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + assert num_outside == 0 + +def test_gene_space_list_single_value_nested_gene_type(): + num_outside, ga_instance = number_respect_gene_space(gene_space=[5], + gene_type=[int, float, numpy.float64, [float, 3], [float, 4], numpy.int16, [numpy.float32, 1], int, float, [float, 3]]) + + # print(ga_instance.population) + + assert num_outside == 0 + def test_nested_gene_space_range(): num_outside, ga_instance = number_respect_gene_space(gene_space=[range(0, 10), range(10, 20), @@ -335,26 +379,40 @@ def test_nested_gene_space_mix_initial_population_single_gene_type(): assert num_outside == 0 if __name__ == "__main__": - print() - test_gene_space_range() - print() - - test_gene_space_numpy_arange() - print() - - test_gene_space_list() - print() - - test_gene_space_list_single_value() - print() - - test_gene_space_numpy() - print() - - test_gene_space_dict_without_step() - print() - - test_gene_space_dict_with_step() + # print() + # test_gene_space_range() + # print() + # test_gene_space_range_nested_gene_type() + # print() + + # test_gene_space_numpy_arange() + # print() + # test_gene_space_numpy_arange_nested_gene_type() + # print() + + # test_gene_space_list() + # print() + # test_gene_space_list_nested_gene_type() + # print() + + # test_gene_space_list_single_value() + # print() + # test_gene_space_list_single_value_nested_gene_type() + # print() + + # test_gene_space_numpy() + # print() + # test_gene_space_numpy_nested_gene_type() + # print() + + # test_gene_space_dict_without_step() + # print() + # test_gene_space_dict_without_step_nested_gene_type() + # print() + + # test_gene_space_dict_with_step() + # print() + test_gene_space_dict_with_step_nested_gene_type() print() test_nested_gene_space_range() From d8b637209401f1e3d56cec38bf74bc30ab21fe88 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Wed, 3 May 2023 12:26:29 -0400 Subject: [PATCH 32/32] Test lifecycle without crossover & mutation --- tests/test_lifecycle_callbacks_calls.py | 76 +++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/tests/test_lifecycle_callbacks_calls.py b/tests/test_lifecycle_callbacks_calls.py index 65b38c5..a6cce83 100644 --- a/tests/test_lifecycle_callbacks_calls.py +++ b/tests/test_lifecycle_callbacks_calls.py @@ -3,7 +3,9 @@ num_generations = 100 def number_lifecycle_callback_functions_calls(stop_criteria=None, - on_generation_stop=None): + on_generation_stop=None, + crossover_type="single_point", + mutation_type="random"): actual_num_callbacks_calls = 0 def fitness_func(ga_instanse, solution, solution_idx): @@ -46,6 +48,8 @@ def on_stop(ga_instance, last_population_fitness): fitness_func=fitness_func, sol_per_pop=10, num_genes=5, + crossover_type=crossover_type, + mutation_type=mutation_type, on_start=on_start, on_fitness=on_fitness, on_parents=on_parents, @@ -71,12 +75,14 @@ def on_stop(ga_instance, last_population_fitness): # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 - print("Expected number of callbacks calls is {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) - print("Actual number of callbacks calls is {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + print("Expected {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) + print("Actual {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) return actual_num_callbacks_calls, expected_num_callbacks_calls def number_lifecycle_callback_methods_calls(stop_criteria=None, - on_generation_stop=None): + on_generation_stop=None, + crossover_type="single_point", + mutation_type="random"): actual_num_callbacks_calls = 0 class Callbacks: @@ -121,6 +127,8 @@ def on_stop(self, ga_instance, last_population_fitness): fitness_func=Callbacks_obj.fitness_func, sol_per_pop=10, num_genes=5, + crossover_type=crossover_type, + mutation_type=mutation_type, on_start=Callbacks_obj.on_start, on_fitness=Callbacks_obj.on_fitness, on_parents=Callbacks_obj.on_parents, @@ -146,8 +154,8 @@ def on_stop(self, ga_instance, last_population_fitness): # Use 'generations_completed' instead of 'num_generations' because the evolution may stops in the on_generation() callback. expected_num_callbacks_calls = 1 + ga_instance.generations_completed * 5 + 1 - print("Expected number of callbacks calls is {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) - print("Actual number of callbacks calls is {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) + print("Expected {expected_num_callbacks_calls}.".format(expected_num_callbacks_calls=expected_num_callbacks_calls)) + print("Actual {actual_num_callbacks_calls}.".format(actual_num_callbacks_calls=actual_num_callbacks_calls)) return actual_num_callbacks_calls, expected_num_callbacks_calls def test_number_lifecycle_callback_functions_calls(): @@ -170,13 +178,69 @@ def test_number_lifecycle_callback_methods_calls_stop_criteria(): assert actual == expected +def test_number_lifecycle_callback_functions_calls_no_crossover(): + actual, expected = number_lifecycle_callback_functions_calls(crossover_type=None) + + assert actual == expected + +def test_number_lifecycle_callback_functions_calls_no_mutation(): + actual, expected = number_lifecycle_callback_functions_calls(mutation_type=None) + + assert actual == expected + +def test_number_lifecycle_callback_functions_calls_no_crossover_no_mutation(): + actual, expected = number_lifecycle_callback_functions_calls(crossover_type=None, + mutation_type=None) + + assert actual == expected + +def test_number_lifecycle_callback_methods_calls_no_crossover(): + actual, expected = number_lifecycle_callback_methods_calls(crossover_type=None) + + assert actual == expected + +def test_number_lifecycle_callback_methods_calls_no_mutation(): + actual, expected = number_lifecycle_callback_methods_calls(mutation_type=None) + + assert actual == expected + +def test_number_lifecycle_callback_methods_calls_no_crossover_no_mutation(): + actual, expected = number_lifecycle_callback_methods_calls(crossover_type=None, + mutation_type=None) + + assert actual == expected + if __name__ == "__main__": print() test_number_lifecycle_callback_functions_calls() print() + test_number_lifecycle_callback_functions_calls_stop_criteria() print() + test_number_lifecycle_callback_methods_calls() print() + test_number_lifecycle_callback_methods_calls_stop_criteria() print() + + test_number_lifecycle_callback_functions_calls_no_crossover() + print() + + test_number_lifecycle_callback_functions_calls_no_crossover() + print() + + test_number_lifecycle_callback_functions_calls_no_mutation() + print() + + test_number_lifecycle_callback_functions_calls_no_crossover_no_mutation() + print() + + test_number_lifecycle_callback_methods_calls_no_crossover() + print() + + test_number_lifecycle_callback_methods_calls_no_mutation() + print() + + test_number_lifecycle_callback_methods_calls_no_crossover_no_mutation() + print()