Initial Commit

This commit is contained in:
Brandon Rozek 2019-06-05 21:56:01 -04:00
commit c38ccbfe8e
4 changed files with 147 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
__pycache__

4
README.md Normal file
View file

@ -0,0 +1,4 @@
# HTTP interface for OpenAI Gym
This library adds a HTTP interface for the [OpenAI Gym Project](https://github.com/openai/gym). Hopefully you will be able to use it in your reinforcement learning projects without noticing!
Why would I want to do this? If you want to decouple the processing of the environment from the training of your models this might be beneficial.

47
gymclient.py Normal file
View file

@ -0,0 +1,47 @@
import pickle
import numpy
import requests
# [TODO] Error handling for if server is down
class Environment:
def __init__(self, address, port, ssl = False):
self.address = address
self.port = port
protocol = "https://" if ssl else "http://"
self.server = protocol + address + ":" + str(port)
##
# Helper Functions
##
def get_environment_name(self):
r = requests.get(self.server + "/environment")
return r.text
def get_state(self):
r = requests.get(self.server + "/state")
return pickle.loads(r.content)
def get_reward(self):
r = requests.get(self.server + "/reward")
return float(r.text)
def get_score(self):
r = requests.get(self.server + "/reward", params = {'all':''})
return float(r.text)
def get_done(self):
r = requests.get(self.server + "/done")
return r.text == "True"
def get_info(self):
r = requests.get(self.server + "/info")
return r.json()
##
# Common API
##
def reset(self):
r = requests.get(self.server + "/reset")
return pickle.loads(r.content)
def step(self, action):
r = requests.post(self.server + "/action", data={'id': action})
content = r.json()
return self.get_state(), float(content['reward']), content['done'] == "True", content['info']
# env = Environment("127.0.0.1", 5000)

95
gymserver.py Normal file
View file

@ -0,0 +1,95 @@
import sys
import gym
from flask import Flask
from flask import request
import pickle
import json
##
# OpenAI Gym State
##
# environment_name = sys.argv[1]
# environment_name = "Acrobot-v1"
environment_name = "Pong-v0"
env = gym.make(environment_name)
# Observations to release to agent
state = env.reset()
reward = 0
score = 0
done = False
info = {}
##
# Helper Functions
##
# [TODO] Evaluate whether pickling is the right option here
def pickle_state():
global state
return pickle.dumps(state)
##
# Flask Environment
##
app = Flask(__name__)
@app.route('/environment', methods=['GET'])
def get_env():
global env, environment_name
if request.args.get('shape') is not None:
shape = {}
shape['observation'] = env.observation_space.shape
shape['action'] = env.action_space.n
return json.dumps(shape)
return environment_name
@app.route('/state', methods=['GET'])
def get_state():
return pickle_state()
@app.route('/reward', methods=['GET'])
def get_reward():
global score, reward
if request.args.get('all') is not None:
return str(score)
else:
return str(reward)
@app.route('/done', methods=['GET'])
def is_done():
global done
return str(done)
@app.route('/info', methods=['GET'])
def get_info():
global info
return json.dumps(info)
@app.route('/action', methods=['POST'])
def perform_action():
global state, reward, done, info, score
action = int(request.form['id'])
# [TODO] Check to see if 'action' is valid
state, reward, done, info = env.step(action)
score += reward
content = {}
content['reward'] = reward
content['done'] = done
content['info'] = info
return json.dumps(content)
@app.route('/reset')
def reset_env():
global env, state, reward, done, info, score
state = env.reset()
reward = 0
done = False
info = {}
score = 0
return pickle_state()