Skip to content
This repository has been archived by the owner on Mar 17, 2019. It is now read-only.

Dqn refactor lidar deepq #151

Merged
merged 2 commits into from
May 11, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Refactor lidar example DeepQ class into deepq.py
  • Loading branch information
Matthew Hansen committed May 11, 2018
commit 124fafba08b617fdd80dd05fa341d5261157642d
242 changes: 4 additions & 238 deletions examples/turtlebot/circuit2_turtlebot_lidar_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,243 +11,8 @@
from distutils.dir_util import copy_tree
import os
import json
import random
import numpy as np
from keras.models import Sequential, load_model
from keras import optimizers
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
from keras.regularizers import l2
import memory
import liveplot

class DeepQ:
"""
DQN abstraction.

As a quick reminder:
traditional Q-learning:
Q(s, a) += alpha * (reward(s,a) + gamma * max(Q(s') - Q(s,a))
DQN:
target = reward(s,a) + gamma * max(Q(s')

"""
def __init__(self, inputs, outputs, memorySize, discountFactor, learningRate, learnStart):
"""
Parameters:
- inputs: input size
- outputs: output size
- memorySize: size of the memory that will store each state
- discountFactor: the discount factor (gamma)
- learningRate: learning rate
- learnStart: steps to happen before for learning. Set to 128
"""
self.input_size = inputs
self.output_size = outputs
self.memory = memory.Memory(memorySize)
self.discountFactor = discountFactor
self.learnStart = learnStart
self.learningRate = learningRate

def initNetworks(self, hiddenLayers):
model = self.createModel(self.input_size, self.output_size, hiddenLayers, "relu", self.learningRate)
self.model = model

targetModel = self.createModel(self.input_size, self.output_size, hiddenLayers, "relu", self.learningRate)
self.targetModel = targetModel

def createRegularizedModel(self, inputs, outputs, hiddenLayers, activationType, learningRate):
bias = True
dropout = 0
regularizationFactor = 0.01
model = Sequential()
if len(hiddenLayers) == 0:
model.add(Dense(self.output_size, input_shape=(self.input_size,), init='lecun_uniform', bias=bias))
model.add(Activation("linear"))
else :
if regularizationFactor > 0:
model.add(Dense(hiddenLayers[0], input_shape=(self.input_size,), init='lecun_uniform', W_regularizer=l2(regularizationFactor), bias=bias))
else:
model.add(Dense(hiddenLayers[0], input_shape=(self.input_size,), init='lecun_uniform', bias=bias))

if (activationType == "LeakyReLU") :
model.add(LeakyReLU(alpha=0.01))
else :
model.add(Activation(activationType))

for index in range(1, len(hiddenLayers)):
layerSize = hiddenLayers[index]
if regularizationFactor > 0:
model.add(Dense(layerSize, init='lecun_uniform', W_regularizer=l2(regularizationFactor), bias=bias))
else:
model.add(Dense(layerSize, init='lecun_uniform', bias=bias))
if (activationType == "LeakyReLU") :
model.add(LeakyReLU(alpha=0.01))
else :
model.add(Activation(activationType))
if dropout > 0:
model.add(Dropout(dropout))
model.add(Dense(self.output_size, init='lecun_uniform', bias=bias))
model.add(Activation("linear"))
optimizer = optimizers.RMSprop(lr=learningRate, rho=0.9, epsilon=1e-06)
model.compile(loss="mse", optimizer=optimizer)
model.summary()
return model

def createModel(self, inputs, outputs, hiddenLayers, activationType, learningRate):
model = Sequential()
if len(hiddenLayers) == 0:
model.add(Dense(self.output_size, input_shape=(self.input_size,), init='lecun_uniform'))
model.add(Activation("linear"))
else :
model.add(Dense(hiddenLayers[0], input_shape=(self.input_size,), init='lecun_uniform'))
if (activationType == "LeakyReLU") :
model.add(LeakyReLU(alpha=0.01))
else :
model.add(Activation(activationType))

for index in range(1, len(hiddenLayers)):
# print("adding layer "+str(index))
layerSize = hiddenLayers[index]
model.add(Dense(layerSize, init='lecun_uniform'))
if (activationType == "LeakyReLU") :
model.add(LeakyReLU(alpha=0.01))
else :
model.add(Activation(activationType))
model.add(Dense(self.output_size, init='lecun_uniform'))
model.add(Activation("linear"))
optimizer = optimizers.RMSprop(lr=learningRate, rho=0.9, epsilon=1e-06)
model.compile(loss="mse", optimizer=optimizer)
model.summary()
return model

def printNetwork(self):
i = 0
for layer in self.model.layers:
weights = layer.get_weights()
print("layer ",i,": ",weights)
i += 1


def backupNetwork(self, model, backup):
weightMatrix = []
for layer in model.layers:
weights = layer.get_weights()
weightMatrix.append(weights)
i = 0
for layer in backup.layers:
weights = weightMatrix[i]
layer.set_weights(weights)
i += 1

def updateTargetNetwork(self):
self.backupNetwork(self.model, self.targetModel)

# predict Q values for all the actions
def getQValues(self, state):
predicted = self.model.predict(state.reshape(1,len(state)))
return predicted[0]

def getTargetQValues(self, state):
#predicted = self.targetModel.predict(state.reshape(1,len(state)))
predicted = self.targetModel.predict(state.reshape(1,len(state)))

return predicted[0]

def getMaxQ(self, qValues):
return np.max(qValues)

def getMaxIndex(self, qValues):
return np.argmax(qValues)

# calculate the target function
def calculateTarget(self, qValuesNewState, reward, isFinal):
"""
target = reward(s,a) + gamma * max(Q(s')
"""
if isFinal:
return reward
else :
return reward + self.discountFactor * self.getMaxQ(qValuesNewState)

# select the action with the highest Q value
def selectAction(self, qValues, explorationRate):
rand = random.random()
if rand < explorationRate :
action = np.random.randint(0, self.output_size)
else :
action = self.getMaxIndex(qValues)
return action

def selectActionByProbability(self, qValues, bias):
qValueSum = 0
shiftBy = 0
for value in qValues:
if value + shiftBy < 0:
shiftBy = - (value + shiftBy)
shiftBy += 1e-06

for value in qValues:
qValueSum += (value + shiftBy) ** bias

probabilitySum = 0
qValueProbabilities = []
for value in qValues:
probability = ((value + shiftBy) ** bias) / float(qValueSum)
qValueProbabilities.append(probability + probabilitySum)
probabilitySum += probability
qValueProbabilities[len(qValueProbabilities) - 1] = 1

rand = random.random()
i = 0
for value in qValueProbabilities:
if (rand <= value):
return i
i += 1

def addMemory(self, state, action, reward, newState, isFinal):
self.memory.addMemory(state, action, reward, newState, isFinal)

def learnOnLastState(self):
if self.memory.getCurrentSize() >= 1:
return self.memory.getMemory(self.memory.getCurrentSize() - 1)

def learnOnMiniBatch(self, miniBatchSize, useTargetNetwork=True):
# Do not learn until we've got self.learnStart samples
if self.memory.getCurrentSize() > self.learnStart:
# learn in batches of 128
miniBatch = self.memory.getMiniBatch(miniBatchSize)
X_batch = np.empty((0,self.input_size), dtype = np.float64)
Y_batch = np.empty((0,self.output_size), dtype = np.float64)
for sample in miniBatch:
isFinal = sample['isFinal']
state = sample['state']
action = sample['action']
reward = sample['reward']
newState = sample['newState']

qValues = self.getQValues(state)
if useTargetNetwork:
qValuesNewState = self.getTargetQValues(newState)
else :
qValuesNewState = self.getQValues(newState)
targetValue = self.calculateTarget(qValuesNewState, reward, isFinal)

X_batch = np.append(X_batch, np.array([state.copy()]), axis=0)
Y_sample = qValues.copy()
Y_sample[action] = targetValue
Y_batch = np.append(Y_batch, np.array([Y_sample]), axis=0)
if isFinal:
X_batch = np.append(X_batch, np.array([newState.copy()]), axis=0)
Y_batch = np.append(Y_batch, np.array([[reward]*self.output_size]), axis=0)
self.model.fit(X_batch, Y_batch, batch_size = len(miniBatch), epochs=1, verbose = 0)

def saveModel(self, path):
self.model.save(path)

def loadWeights(self, path):
self.model.set_weights(load_model(path).get_weights())
import deepq

def detect_monitor_files(training_dir):
return [os.path.join(training_dir, f) for f in os.listdir(training_dir) if f.startswith('openaigym')]
Expand Down Expand Up @@ -294,7 +59,7 @@ def clear_monitor_files(training_dir):
network_structure = [300,300]
current_epoch = 0

deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ = deepq.DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ.initNetworks(network_structure)
else:
#Load weights, monitor info and parameter info.
Expand All @@ -315,8 +80,9 @@ def clear_monitor_files(training_dir):
network_structure = d.get('network_structure')
current_epoch = d.get('current_epoch')

deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ = deepq.DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ.initNetworks(network_structure)

deepQ.loadWeights(weights_path)

clear_monitor_files(outdir)
Expand Down
Loading