Initial Commit
This commit is contained in:
commit
c38ccbfe8e
4 changed files with 147 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
__pycache__
|
4
README.md
Normal file
4
README.md
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
# HTTP interface for OpenAI Gym
|
||||||
|
This library adds a HTTP interface for the [OpenAI Gym Project](https://github.com/openai/gym). Hopefully you will be able to use it in your reinforcement learning projects without noticing!
|
||||||
|
|
||||||
|
Why would I want to do this? If you want to decouple the processing of the environment from the training of your models this might be beneficial.
|
47
gymclient.py
Normal file
47
gymclient.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import pickle
|
||||||
|
import numpy
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
# [TODO] Error handling for if server is down
|
||||||
|
class Environment:
|
||||||
|
def __init__(self, address, port, ssl = False):
|
||||||
|
self.address = address
|
||||||
|
self.port = port
|
||||||
|
protocol = "https://" if ssl else "http://"
|
||||||
|
self.server = protocol + address + ":" + str(port)
|
||||||
|
|
||||||
|
##
|
||||||
|
# Helper Functions
|
||||||
|
##
|
||||||
|
def get_environment_name(self):
|
||||||
|
r = requests.get(self.server + "/environment")
|
||||||
|
return r.text
|
||||||
|
def get_state(self):
|
||||||
|
r = requests.get(self.server + "/state")
|
||||||
|
return pickle.loads(r.content)
|
||||||
|
def get_reward(self):
|
||||||
|
r = requests.get(self.server + "/reward")
|
||||||
|
return float(r.text)
|
||||||
|
def get_score(self):
|
||||||
|
r = requests.get(self.server + "/reward", params = {'all':''})
|
||||||
|
return float(r.text)
|
||||||
|
def get_done(self):
|
||||||
|
r = requests.get(self.server + "/done")
|
||||||
|
return r.text == "True"
|
||||||
|
def get_info(self):
|
||||||
|
r = requests.get(self.server + "/info")
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
##
|
||||||
|
# Common API
|
||||||
|
##
|
||||||
|
def reset(self):
|
||||||
|
r = requests.get(self.server + "/reset")
|
||||||
|
return pickle.loads(r.content)
|
||||||
|
def step(self, action):
|
||||||
|
r = requests.post(self.server + "/action", data={'id': action})
|
||||||
|
content = r.json()
|
||||||
|
return self.get_state(), float(content['reward']), content['done'] == "True", content['info']
|
||||||
|
|
||||||
|
# env = Environment("127.0.0.1", 5000)
|
95
gymserver.py
Normal file
95
gymserver.py
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
import sys
|
||||||
|
import gym
|
||||||
|
from flask import Flask
|
||||||
|
from flask import request
|
||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# OpenAI Gym State
|
||||||
|
##
|
||||||
|
# environment_name = sys.argv[1]
|
||||||
|
# environment_name = "Acrobot-v1"
|
||||||
|
environment_name = "Pong-v0"
|
||||||
|
env = gym.make(environment_name)
|
||||||
|
|
||||||
|
# Observations to release to agent
|
||||||
|
state = env.reset()
|
||||||
|
reward = 0
|
||||||
|
score = 0
|
||||||
|
done = False
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
##
|
||||||
|
# Helper Functions
|
||||||
|
##
|
||||||
|
# [TODO] Evaluate whether pickling is the right option here
|
||||||
|
def pickle_state():
|
||||||
|
global state
|
||||||
|
return pickle.dumps(state)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Flask Environment
|
||||||
|
##
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/environment', methods=['GET'])
|
||||||
|
def get_env():
|
||||||
|
global env, environment_name
|
||||||
|
if request.args.get('shape') is not None:
|
||||||
|
shape = {}
|
||||||
|
shape['observation'] = env.observation_space.shape
|
||||||
|
shape['action'] = env.action_space.n
|
||||||
|
return json.dumps(shape)
|
||||||
|
return environment_name
|
||||||
|
|
||||||
|
@app.route('/state', methods=['GET'])
|
||||||
|
def get_state():
|
||||||
|
return pickle_state()
|
||||||
|
|
||||||
|
@app.route('/reward', methods=['GET'])
|
||||||
|
def get_reward():
|
||||||
|
global score, reward
|
||||||
|
if request.args.get('all') is not None:
|
||||||
|
return str(score)
|
||||||
|
else:
|
||||||
|
return str(reward)
|
||||||
|
|
||||||
|
@app.route('/done', methods=['GET'])
|
||||||
|
def is_done():
|
||||||
|
global done
|
||||||
|
return str(done)
|
||||||
|
|
||||||
|
@app.route('/info', methods=['GET'])
|
||||||
|
def get_info():
|
||||||
|
global info
|
||||||
|
return json.dumps(info)
|
||||||
|
|
||||||
|
@app.route('/action', methods=['POST'])
|
||||||
|
def perform_action():
|
||||||
|
global state, reward, done, info, score
|
||||||
|
action = int(request.form['id'])
|
||||||
|
|
||||||
|
# [TODO] Check to see if 'action' is valid
|
||||||
|
state, reward, done, info = env.step(action)
|
||||||
|
score += reward
|
||||||
|
|
||||||
|
content = {}
|
||||||
|
content['reward'] = reward
|
||||||
|
content['done'] = done
|
||||||
|
content['info'] = info
|
||||||
|
return json.dumps(content)
|
||||||
|
|
||||||
|
@app.route('/reset')
|
||||||
|
def reset_env():
|
||||||
|
global env, state, reward, done, info, score
|
||||||
|
state = env.reset()
|
||||||
|
reward = 0
|
||||||
|
done = False
|
||||||
|
info = {}
|
||||||
|
score = 0
|
||||||
|
return pickle_state()
|
||||||
|
|
Loading…
Reference in a new issue