From c38ccbfe8e1eeb344a1d5492538ad54945a8bf34 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Wed, 5 Jun 2019 21:56:01 -0400 Subject: [PATCH] Initial Commit --- .gitignore | 1 + README.md | 4 +++ gymclient.py | 47 ++++++++++++++++++++++++++ gymserver.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 gymclient.py create mode 100644 gymserver.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..09f29d8 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# HTTP interface for OpenAI Gym +This library adds a HTTP interface for the [OpenAI Gym Project](https://github.com/openai/gym). Hopefully you will be able to use it in your reinforcement learning projects without noticing! + +Why would I want to do this? If you want to decouple the processing of the environment from the training of your models this might be beneficial. \ No newline at end of file diff --git a/gymclient.py b/gymclient.py new file mode 100644 index 0000000..0c4d8fb --- /dev/null +++ b/gymclient.py @@ -0,0 +1,47 @@ +import pickle +import numpy +import requests + + +# [TODO] Error handling for if server is down +class Environment: + def __init__(self, address, port, ssl = False): + self.address = address + self.port = port + protocol = "https://" if ssl else "http://" + self.server = protocol + address + ":" + str(port) + + ## + # Helper Functions + ## + def get_environment_name(self): + r = requests.get(self.server + "/environment") + return r.text + def get_state(self): + r = requests.get(self.server + "/state") + return pickle.loads(r.content) + def get_reward(self): + r = requests.get(self.server + "/reward") + return float(r.text) + def get_score(self): + r = requests.get(self.server + "/reward", params = {'all':''}) + return float(r.text) + def get_done(self): + r = requests.get(self.server + "/done") + return r.text == "True" + def get_info(self): + r = requests.get(self.server + "/info") + return r.json() + + ## + # Common API + ## + def reset(self): + r = requests.get(self.server + "/reset") + return pickle.loads(r.content) + def step(self, action): + r = requests.post(self.server + "/action", data={'id': action}) + content = r.json() + return self.get_state(), float(content['reward']), content['done'] == "True", content['info'] + +# env = Environment("127.0.0.1", 5000) \ No newline at end of file diff --git a/gymserver.py b/gymserver.py new file mode 100644 index 0000000..7a74370 --- /dev/null +++ b/gymserver.py @@ -0,0 +1,95 @@ +import sys +import gym +from flask import Flask +from flask import request +import pickle +import json + + +## +# OpenAI Gym State +## +# environment_name = sys.argv[1] +# environment_name = "Acrobot-v1" +environment_name = "Pong-v0" +env = gym.make(environment_name) + +# Observations to release to agent +state = env.reset() +reward = 0 +score = 0 +done = False +info = {} + +## +# Helper Functions +## +# [TODO] Evaluate whether pickling is the right option here +def pickle_state(): + global state + return pickle.dumps(state) + + + +## +# Flask Environment +## +app = Flask(__name__) + +@app.route('/environment', methods=['GET']) +def get_env(): + global env, environment_name + if request.args.get('shape') is not None: + shape = {} + shape['observation'] = env.observation_space.shape + shape['action'] = env.action_space.n + return json.dumps(shape) + return environment_name + +@app.route('/state', methods=['GET']) +def get_state(): + return pickle_state() + +@app.route('/reward', methods=['GET']) +def get_reward(): + global score, reward + if request.args.get('all') is not None: + return str(score) + else: + return str(reward) + +@app.route('/done', methods=['GET']) +def is_done(): + global done + return str(done) + +@app.route('/info', methods=['GET']) +def get_info(): + global info + return json.dumps(info) + +@app.route('/action', methods=['POST']) +def perform_action(): + global state, reward, done, info, score + action = int(request.form['id']) + + # [TODO] Check to see if 'action' is valid + state, reward, done, info = env.step(action) + score += reward + + content = {} + content['reward'] = reward + content['done'] = done + content['info'] = info + return json.dumps(content) + +@app.route('/reset') +def reset_env(): + global env, state, reward, done, info, score + state = env.reset() + reward = 0 + done = False + info = {} + score = 0 + return pickle_state() +