Merge branch 'master' of https://github.com/zigui-ps/PPO-PyTorchCpp-API

zigui-ps · zigui-ps · commit b1a8b1b3aade · 2020-04-01T13:36:59.000+09:00
diff --git a/A.cpp b/A.cpp
@@ -3,15 +3,16 @@
 
 void PythonInit(int argc, char *argv[]){
     Py_Initialize();
+	PyEval_InitThreads();
 	wchar_t **argw = new wchar_t*[argc];
 	for(int i = 0; i < argc; i++) argw[i] = Py_DecodeLocale(argv[i], NULL);
     PySys_SetArgv(argc, argw);
 }
 
-AgentInterfacePtr PPO_agent_with_param(GymEnvironmentPtr env, std::vector<int> actor_size, double actor_lr, \
+AgentInterfacePtr PPO_agent_with_param(std::vector<PytorchEnvironmentPtr> &env, std::vector<int> actor_size, double actor_lr, \
 		std::vector<int> critic_size, double critic_lr, double critic_decay, double gamma, double lamda, int steps, int batch_size){
-	int o_size = env->observationSize;
-	int a_size = env->actionSize;
+	int o_size = env[0]->observationSize;
+	int a_size = env[0]->actionSize;
 	actor_size.insert(actor_size.begin(), o_size); actor_size.insert(actor_size.end(), a_size);
 	DeepNetworkPtr actor_network = DeepNetworkPtr(new DeepNetwork(actor_size));
 	AdamPtr actor_opt = AdamPtr(new torch::optim::Adam(actor_network->parameters(), actor_lr));
@@ -29,10 +30,10 @@ AgentInterfacePtr PPO_agent_with_param(GymEnvironmentPtr env, std::vector<int> a
 	return AgentInterfacePtr(new PPOAgent(env, actor, critic, state_modifier, gamma, lamda, steps, batch_size));
 }
 
-void train(AgentInterfacePtr agent, int train_step, torch::Device device){
+void train(AgentInterfacePtr agent, int train_step, torch::Device device, bool render){
 	mkdir("save_model", 0775);
 	for(int i = 0; i < train_step; i++){
-		agent->train(5, device, true);
+		agent->train(5, device, render);
 		printf("train fin\n");
 		std::stringstream name;
 		name << "./save_model/" << i;
@@ -45,7 +46,7 @@ void train(AgentInterfacePtr agent, int train_step, torch::Device device){
 	}
 }
 
-void demo(GymEnvironmentPtr env, AgentInterfacePtr agent, torch::Device device){
+void demo(PytorchEnvironmentPtr env, AgentInterfacePtr agent, torch::Device device){
 	agent->to(device);
 	torch::Tensor state = env->reset();
 	while(1){
@@ -62,6 +63,8 @@ static std::string load_model;
 static torch::Device device = torch::kCPU;
 static int train_step = 0;
 static std::string env_type;
+static int render = 0;
+static int cpu = 16;
 
 void ParseArgs(int argc, char *argv[]){
 	for(int i = 0; i < argc; i++){
@@ -80,16 +83,17 @@ int main(int argc, char *argv[])
 	PythonInit(argc, argv);
 	ParseArgs(argc, argv);
 
-	GymEnvironmentPtr env = GymEnvironmentPtr(new GymEnvironment(env_type.c_str(), device));
+	std::vector<PytorchEnvironmentPtr> env;
+	for(int i = 0; i < cpu; i++) env.push_back(PytorchEnvironmentPtr(new GymEnvironment(env_type.c_str(), device)));
 	AgentInterfacePtr agent = PPO_agent_with_param(env, {128, 128}, 1e-4, {128, 128}, 1e-4, 7e-4, 0.994, 0.99, 4096, 80);
 	//AgentInterfacePtr agent = Vanila_agent_with_param(env, {128, 128}, 1e-4, {128, 128}, 1e-4, 7e-4, 0.994, 2048, 32);
 	if(load_model != ""){
 		tinyxml2::XMLDocument doc;
 		if(doc.LoadFile(load_model.c_str())) return !printf("%s not exist\n", load_model.c_str());
 		agent->set_xml(doc.RootElement());
 	}
-	train(agent, train_step, device);
-	demo(env, agent, device);
+	train(agent, train_step, device, render);
+	if(render) demo(env[0], agent, device);
 
 	if (Py_FinalizeEx() < 0) {
 		return 120;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -7,13 +7,14 @@ add_definitions(-DTIXML_USE_STL)
 add_compile_options(-g)
 find_package(Torch REQUIRED)
 find_package(PythonLibs REQUIRED)
+find_package(OpenMP)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 include_directories(${PYTHON_INCLUDE_DIRS})
 include_directories("./include")
 include_directories("./tinyxml2")
 
 file(GLOB srcs A.cpp cpp/*/*.cpp tinyxml2/*.cpp)
 add_executable(A ${srcs})
-target_link_libraries(A "${TORCH_LIBRARIES}" ${PYTHON_LIBRARIES})
+target_link_libraries(A "${TORCH_LIBRARIES}" ${PYTHON_LIBRARIES} ${OpenMP_CXX_FLAGS})
 set_property(TARGET A PROPERTY CXX_STANDARD 14)
 
diff --git a/cpp/Environment/GymEnvironment.cpp b/cpp/Environment/GymEnvironment.cpp
@@ -5,6 +5,8 @@
 PyObject* GymEnvironment::pModule;
 PyObject* GymEnvironment::pMake;
 
+static std::mutex mtx;
+
 GymEnvironment::GymEnvironment(const char* name, torch::Device device) : 
 		PytorchEnvironment(device), PyWrapper(pModule == NULL? init(name) : PyObject_CallFunctionObjArgs(pMake, PyString(name).obj, NULL)){
 	pyReset = PyObject_GetAttrString(obj, "reset");
@@ -27,20 +29,27 @@ PyObject* GymEnvironment::init(const char* name){
 
 torch::Tensor GymEnvironment::reset(){
 	steps = 0;
-	return PySequenceToTensor(PyObject_CallFunctionObjArgs(pyReset, NULL), true).to(device);
+	mtx.lock();
+	torch::Tensor tmp = PySequenceToTensor(PyObject_CallFunctionObjArgs(pyReset, NULL), true).to(device);
+	mtx.unlock();
+	return tmp;
 }
 
 void GymEnvironment::step(const torch::Tensor &action, torch::Tensor &next_state, double &reward, int &done, int &tl){
+	mtx.lock();
 	PyObject* tuple = PyObject_CallFunctionObjArgs(pyStep, PyArray(action.to(torch::kCPU)).obj, NULL);
 	if(tuple == NULL) PyErr_Print();
 	next_state = PySequenceToTensor(PyTuple_GetItem(tuple, 0)).to(device);
 	reward = PyFloat_AsDouble(PyTuple_GetItem(tuple, 1));
 	done = (PyTuple_GetItem(tuple, 2) == Py_True);
 	tl = steps >= 2000;
 	Py_DECREF(tuple);
+	mtx.unlock();
 	if(done) next_state = reset();
 }
 
 void GymEnvironment::render(){
+	mtx.lock();
 	if(PyObject_CallFunctionObjArgs(pyRender, NULL) == NULL) PyErr_Print();
-}
+	mtx.unlock();
+}
diff --git a/include/Environment/GymEnvironment.h b/include/Environment/GymEnvironment.h
@@ -10,7 +10,7 @@ class GymEnvironment : public PyWrapper, public PytorchEnvironment{
 public:
 	GymEnvironment(const char* name, torch::Device device);
 	PyObject* pyReset, *pyStep, *pyRender;
-	int steps, observationSize, actionSize;
+	int steps;
 
 	virtual torch::Tensor reset();
 	virtual void step(const torch::Tensor &action, torch::Tensor &next_state, double &reward, int &done, int &tl);