2
2
import torch .nn as nn
3
3
import torch
4
4
5
+ ##########################################################################################################
6
+ #MLP ACTOR-CRITIC##
7
+ ##########################################################################################################
8
+
5
9
def mlp (sizes , activation , output_activation = nn .Identity ):
6
10
'''
7
11
Create a multi-layer perceptron model from input sizes and activations
@@ -87,6 +91,154 @@ def __init__(self, observation_space, action_space, hidden_sizes=(256, 256), act
87
91
self .pi = MLPActor (obs_dim , act_dim , hidden_sizes , activation , act_limit ).to (device )
88
92
self .q = MLPCritic (obs_dim , act_dim , hidden_sizes , activation ).to (device )
89
93
94
+ def act (self , obs ):
95
+ with torch .no_grad ():
96
+ return self .pi (obs ).cpu ().numpy ()
97
+
98
+
99
+ ##########################################################################################################
100
+ #CNN ACTOR-CRITIC##
101
+ ##########################################################################################################
102
+
103
+ def cnn (in_channels , conv_layer_sizes , activation , batchnorm = True ):
104
+ '''
105
+ Create a Convolutional Neural Network with given number of cnn layers
106
+ Each convolutional layer has kernel_size=2, and stride=2, which effectively
107
+ halves the spatial dimensions and doubles the channel size.
108
+ Args:
109
+ con_layer_sizes (list): list of 3-tuples consisting of
110
+ (output_channel, kernel_size, stride)
111
+ in_channels (int): incoming number of channels
112
+ num_layers (int): number of convolutional layers needed
113
+ activation (nn.Module.Activation): Activation function after each conv layer
114
+ batchnorm (bool): If true, add a batchnorm2d layer after activation layer
115
+ Returns:
116
+ nn.Sequential module for the CNN
117
+ '''
118
+ layers = []
119
+ channels = in_channels
120
+ for i in range (len (conv_layer_sizes )):
121
+ out_channel , kernel , stride = conv_layer_sizes [i ]
122
+ layers += [nn .Conv2d (in_channels , out_channel , kernel , stride ),
123
+ activation ()]
124
+ if batchnorm :
125
+ layers += [nn .BatchNorm2d (out_channel )]
126
+
127
+ in_channels = out_channel
128
+
129
+ return nn .Sequential (* layers )
130
+
131
+
132
+
133
+ class CNNActor (nn .Module ):
134
+ def __init__ (self , obs_dim , act_dim , conv_layer_sizes , hidden_sizes , activation , act_limit ):
135
+ '''
136
+ A Convolutional Neural Net for the Actor network
137
+ Network Architecture: (input) -> CNN -> MLP -> (output)
138
+ Assume input is in the shape: (128, 128, 3)
139
+ Args:
140
+ obs_dim (tuple): observation dimension of the environment in the form of (H, W, C)
141
+ act_dim (int): action dimension of the environment
142
+ conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
143
+ that describes the cnn architecture
144
+ hidden_sizes (list): list of number of neurons in each layer of MLP after output from CNN
145
+ activation (nn.modules.activation): Activation function for each layer of MLP
146
+ act_limit (float): the greatest magnitude possible for the action in the environment
147
+ '''
148
+ super ().__init__ ()
149
+
150
+ self .pi_cnn = cnn (obs_dim [2 ], conv_layer_sizes , nn .ReLU , batchnorm = True )
151
+ self .start_dim = self .calc_shape (obs_dim , self .pi_cnn )
152
+ mlp_sizes = [self .start_dim ] + list (hidden_sizes ) + [act_dim ]
153
+ self .pi_mlp = mlp (mlp_sizes , activation , output_activation = nn .Tanh )
154
+ self .act_limit = act_limit
155
+
156
+ def calc_shape (self , obs_dim , pi_cnn ):
157
+ '''
158
+ Function to determine the shape of the data after the conv layers
159
+ to determine how many neurons for the MLP.
160
+ '''
161
+ H , W , C = obs_dim
162
+ dummy_input = torch .randn (1 , C , H , W )
163
+ with torch .no_grad ():
164
+ cnn_out = pi_cnn (dummy_input )
165
+ shape = cnn_out .view (- 1 , ).shape [0 ]
166
+ return shape
167
+
168
+ def forward (self , obs ):
169
+ '''
170
+ Forward propagation for actor network
171
+ Args:
172
+ obs (Tensor [n, obs_dim]): batch of observation from environment
173
+ Return:
174
+ output of actor network * act_limit
175
+ '''
176
+ obs = self .pi_cnn (obs )
177
+ obs = obs .view (- 1 , self .start_dim )
178
+ obs = self .pi_mlp (obs )
179
+ return obs * self .act_limit
180
+
181
+ class CNNCritic (nn .Module ):
182
+ def __init__ (self , obs_dim , act_dim , conv_layer_sizes , hidden_sizes , activation ):
183
+ '''
184
+ A Convolutional Neural Net for the Critic network
185
+ Args:
186
+ obs_dim (tuple): observation dimension of the environment in the form of (H, W, C)
187
+ act_dim (int): action dimension of the environment
188
+ conv_layer_sizes (list): list of 3-tuples consisting of (output_channel, kernel_size, stride)
189
+ that describes the cnn architecture
190
+ hidden_sizes (list): list of number of neurons in each layer of MLP
191
+ activation (nn.modules.activation): Activation function for each layer of MLP
192
+ '''
193
+ super ().__init__ ()
194
+ self .q_cnn = cnn (obs_dim [2 ], conv_layer_sizes , nn .ReLU , batchnorm = True )
195
+ self .start_dim = self .calc_shape (obs_dim , self .q_cnn )
196
+ self .q_mlp = mlp ([self .start_dim + act_dim ] + list (hidden_sizes ) + [1 ], activation )
197
+
198
+ def calc_shape (self , obs_dim , pi_cnn ):
199
+ '''
200
+ Function to determine the shape of the data after the conv layers
201
+ to determine how many neurons for the MLP.
202
+ '''
203
+ H , W , C = obs_dim
204
+ dummy_input = torch .randn (1 , C , H , W )
205
+ with torch .no_grad ():
206
+ cnn_out = pi_cnn (dummy_input )
207
+ shape = cnn_out .view (- 1 , ).shape [0 ]
208
+ return shape
209
+
210
+ def forward (self , obs , act ):
211
+ '''
212
+ Forward propagation for critic network
213
+ Args:
214
+ obs (Tensor [n, obs_dim]): batch of observation from environment
215
+ act (Tensor [n, act_dim]): batch of actions taken by actor
216
+ '''
217
+ obs = self .q_cnn (obs )
218
+ obs = obs .view (- 1 , self .start_dim )
219
+ q = self .q_mlp (torch .cat ([obs , act ], dim = - 1 ))
220
+ return torch .squeeze (q , - 1 ) # ensure q has the right shape
221
+
222
+ class CNNActorCritic (nn .Module ):
223
+ def __init__ (self , observation_space , action_space , conv_layer_sizes , hidden_sizes = (256 , 256 ), activation = nn .ReLU , device = 'cpu' ):
224
+ '''
225
+ A Multi-Layer Perceptron for the Actor_Critic network
226
+ Args:
227
+ observation_space (gym.spaces): observation space of the environment
228
+ act_space (gym.spaces): action space of the environment
229
+ hidden_sizes (tuple): list of number of neurons in each layer of MLP
230
+ activation (nn.modules.activation): Activation function for each layer of MLP
231
+ device (str): whether to use cpu or gpu to run the model
232
+ '''
233
+ super ().__init__ ()
234
+ obs_dim = observation_space .shape
235
+ act_dim = action_space .shape [0 ]
236
+ act_limit = action_space .high [0 ]
237
+
238
+ # Create Actor and Critic networks
239
+ self .pi = CNNActor (obs_dim , act_dim , conv_layer_sizes , hidden_sizes , activation , act_limit ).to (device )
240
+ self .q = CNNCritic (obs_dim , act_dim , conv_layer_sizes , hidden_sizes , activation ).to (device )
241
+
90
242
def act (self , obs ):
91
243
with torch .no_grad ():
92
244
return self .pi (obs ).cpu ().numpy ()
0 commit comments