import torch from typing import List import numpy as np from rlbench.backend.observation import Observation class LearningAgent(): """ General Purpose class to abstract the functionality of the network from the agent. Use this as a base class to create differnt Learning Based agenets that can work and be trained on different Deep Learning Algorithms. """ def __init__(self,collect_gradients=False): self.learning_rate = None self.neural_network = None self.optimizer = None self.loss_function = None self.training_data = None self.logger = None self.input_state = None self.output_action = None self.total_train_size = None # This is to mark the size of the training data for the agent. self.collect_gradients = collect_gradients self.gradients = { 'max':[], 'avg':[], 'layer':[] } self.print_every = 40 def injest_demonstrations(self,demos:List[List[Observation]],**kwargs): raise NotImplementedError() def train_agent(self,epochs:int): raise NotImplementedError() # Keeping it a list of Observation to keep flexibility for LSTM type networks. def predict_action(self, demonstration_episode:List[Observation],**kwargs) -> np.array: """ This should Use model.eval() in Pytoch to do prediction for an action This is ment for using saved model. This should also ensure returning numpy array is of same dimension as ActionMode + 1 (Gripper Open/close) """ raise NotImplementedError() def save_model(self,file_path): """ This will be used to save the model for which ever type of agent(TF/Torch) """ raise NotImplementedError() def load_model(self,file_path): """ This will be used to load the model from file. """ raise NotImplementedError() def load_model_from_object(self,state_dict): """ This will be used to load the model from a dictionary. """ raise NotImplementedError() class RLAgent(LearningAgent): def __init__(self,warmup=500, **kwargs): self.warmup = warmup self.is_training = False super(RLAgent,self).__init__(**kwargs) def observe(self,state_t1:List[Observation],action_t,reward_t:int,done:bool): """ This is for managing replay storing. Will be called after agent takes step and reward is recorded from the env. This will get state: s_t+1,a_t,r_t """ raise NotImplementedError() def update(self): """ This will be used by the RL agents to actually Update the Policy. This will let pytorch DO GD basis rewards when running the network. """ raise NotImplementedError() def act(self,state:List[Observation],**kwargs): """ This will be used by the RL agents to act on state `s_t` This method will be used in coherance with `observe` which will get `s_t+1` as input This will let pytorch hold gradients when running the network. """ raise NotImplementedError() def reset(self,state:List[Observation],**kwargs): """ This will reset the state on termination of an episode. This will ensure that agent captures termination conditions of completion """ raise NotImplementedError() class TorchAgent(LearningAgent): def __init__(self,**kwargs): super(TorchAgent,self).__init__(**kwargs) def save_model(self,file_path): if not self.neural_network: return self.neural_network.to('cpu') torch.save(self.neural_network.state_dict(), file_path) def load_model(self,file_path): if not self.neural_network: return # $ this will load a model from file path. self.neural_network.load_state_dict(torch.load(file_path)) def load_model_from_object(self,state_dict): if not self.neural_network: return self.neural_network.load_state_dict(state_dict) # Expects Named Params from Torch NN Module. def set_gradients(self,named_parameters): avg_grads = [] max_grads= [] layers = [] for n, p in named_parameters: if(p.requires_grad) and ("bias" not in n): layers.append(n) avg_grads.append(p.grad.abs().mean()) max_grads.append(p.grad.abs().max()) self.gradients['max'].append(max_grads) self.gradients['avg'].append(avg_grads) self.gradients['layer'].append(layers) class TorchRLAgent(TorchAgent,RLAgent): def __init__(self, **kwargs): super().__init__(**kwargs)