Initial Commit
This commit is contained in:
commit
c38ccbfe8e
4 changed files with 147 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
__pycache__
|
4
README.md
Normal file
4
README.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
# HTTP interface for OpenAI Gym
|
||||
This library adds a HTTP interface for the [OpenAI Gym Project](https://github.com/openai/gym). Hopefully you will be able to use it in your reinforcement learning projects without noticing!
|
||||
|
||||
Why would I want to do this? If you want to decouple the processing of the environment from the training of your models this might be beneficial.
|
47
gymclient.py
Normal file
47
gymclient.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import pickle
|
||||
import numpy
|
||||
import requests
|
||||
|
||||
|
||||
# [TODO] Error handling for if server is down
|
||||
class Environment:
|
||||
def __init__(self, address, port, ssl = False):
|
||||
self.address = address
|
||||
self.port = port
|
||||
protocol = "https://" if ssl else "http://"
|
||||
self.server = protocol + address + ":" + str(port)
|
||||
|
||||
##
|
||||
# Helper Functions
|
||||
##
|
||||
def get_environment_name(self):
|
||||
r = requests.get(self.server + "/environment")
|
||||
return r.text
|
||||
def get_state(self):
|
||||
r = requests.get(self.server + "/state")
|
||||
return pickle.loads(r.content)
|
||||
def get_reward(self):
|
||||
r = requests.get(self.server + "/reward")
|
||||
return float(r.text)
|
||||
def get_score(self):
|
||||
r = requests.get(self.server + "/reward", params = {'all':''})
|
||||
return float(r.text)
|
||||
def get_done(self):
|
||||
r = requests.get(self.server + "/done")
|
||||
return r.text == "True"
|
||||
def get_info(self):
|
||||
r = requests.get(self.server + "/info")
|
||||
return r.json()
|
||||
|
||||
##
|
||||
# Common API
|
||||
##
|
||||
def reset(self):
|
||||
r = requests.get(self.server + "/reset")
|
||||
return pickle.loads(r.content)
|
||||
def step(self, action):
|
||||
r = requests.post(self.server + "/action", data={'id': action})
|
||||
content = r.json()
|
||||
return self.get_state(), float(content['reward']), content['done'] == "True", content['info']
|
||||
|
||||
# env = Environment("127.0.0.1", 5000)
|
95
gymserver.py
Normal file
95
gymserver.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
import sys
|
||||
import gym
|
||||
from flask import Flask
|
||||
from flask import request
|
||||
import pickle
|
||||
import json
|
||||
|
||||
|
||||
##
|
||||
# OpenAI Gym State
|
||||
##
|
||||
# environment_name = sys.argv[1]
|
||||
# environment_name = "Acrobot-v1"
|
||||
environment_name = "Pong-v0"
|
||||
env = gym.make(environment_name)
|
||||
|
||||
# Observations to release to agent
|
||||
state = env.reset()
|
||||
reward = 0
|
||||
score = 0
|
||||
done = False
|
||||
info = {}
|
||||
|
||||
##
|
||||
# Helper Functions
|
||||
##
|
||||
# [TODO] Evaluate whether pickling is the right option here
|
||||
def pickle_state():
|
||||
global state
|
||||
return pickle.dumps(state)
|
||||
|
||||
|
||||
|
||||
##
|
||||
# Flask Environment
|
||||
##
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/environment', methods=['GET'])
|
||||
def get_env():
|
||||
global env, environment_name
|
||||
if request.args.get('shape') is not None:
|
||||
shape = {}
|
||||
shape['observation'] = env.observation_space.shape
|
||||
shape['action'] = env.action_space.n
|
||||
return json.dumps(shape)
|
||||
return environment_name
|
||||
|
||||
@app.route('/state', methods=['GET'])
|
||||
def get_state():
|
||||
return pickle_state()
|
||||
|
||||
@app.route('/reward', methods=['GET'])
|
||||
def get_reward():
|
||||
global score, reward
|
||||
if request.args.get('all') is not None:
|
||||
return str(score)
|
||||
else:
|
||||
return str(reward)
|
||||
|
||||
@app.route('/done', methods=['GET'])
|
||||
def is_done():
|
||||
global done
|
||||
return str(done)
|
||||
|
||||
@app.route('/info', methods=['GET'])
|
||||
def get_info():
|
||||
global info
|
||||
return json.dumps(info)
|
||||
|
||||
@app.route('/action', methods=['POST'])
|
||||
def perform_action():
|
||||
global state, reward, done, info, score
|
||||
action = int(request.form['id'])
|
||||
|
||||
# [TODO] Check to see if 'action' is valid
|
||||
state, reward, done, info = env.step(action)
|
||||
score += reward
|
||||
|
||||
content = {}
|
||||
content['reward'] = reward
|
||||
content['done'] = done
|
||||
content['info'] = info
|
||||
return json.dumps(content)
|
||||
|
||||
@app.route('/reset')
|
||||
def reset_env():
|
||||
global env, state, reward, done, info, score
|
||||
state = env.reset()
|
||||
reward = 0
|
||||
done = False
|
||||
info = {}
|
||||
score = 0
|
||||
return pickle_state()
|
||||
|
Loading…
Reference in a new issue