GymHTTP/gymserver.py

140 lines
3.3 KiB
Python

import sys
import gym
from flask import Flask
from flask import request
import pickle
import json
# Make it so that it doesn't log every HTTP request
import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)
##
# OpenAI Gym State
##
# environment_name = "Acrobot-v1"
environment_name = "Pong-v0"
env = gym.make(environment_name)
# Observations to release to agent
state = env.reset()
reward = 0
score = 0
done = False
info = {}
##
# Helper Functions
##
# [TODO] Evaluate whether pickling is the right option here
def pickle_state():
global state
return pickle.dumps(state)
##
# Flask Environment
##
app = Flask(__name__)
@app.route('/environment', methods=['GET'])
def get_env():
global env, environment_name
if request.args.get('shape') is not None:
shape = {}
shape['observation'] = env.observation_space.shape
shape['action'] = env.action_space.n
return json.dumps(shape)
return environment_name
@app.route('/gym', methods=['GET'])
def get_extra_data():
global env
data = {}
if request.args.get('action_space') is not None:
data['action_space'] = env.action_space
if request.args.get('observation_space') is not None:
data['observation_space'] = env.observation_space
if request.args.get('reward_range') is not None:
data['reward_range'] = env.reward_range
if request.args.get('metadata') is not None:
data['metadata'] = env.metadata
if request.args.get('action_meanings') is not None:
data['action_meanings'] = env.unwrapped.get_action_meanings()
return pickle.dumps(data)
@app.route('/action_space', methods=['GET'])
def get_action_space():
global env
return pickle.dumps(env.action_space)
@app.route('/observation_space', methods=['GET'])
def get_observation_space():
global env
return pickle.dumps(env.observation_space)
@app.route('/reward_range', methods=['GET'])
def get_reward_range():
global env
return pickle.dumps(env.reward_range)
@app.route('/metadata', methods=['GET'])
def get_metadata():
global env
return pickle.dumps(env.metadata)
@app.route('/action_meanings', methods=['GET'])
def get_action_meanings():
global env
return pickle.dumps(env.unwrapped.get_action_meanings())
@app.route('/state', methods=['GET'])
def get_state():
return pickle_state()
@app.route('/reward', methods=['GET'])
def get_reward():
global score, reward
if request.args.get('all') is not None:
return str(score)
else:
return str(reward)
@app.route('/done', methods=['GET'])
def is_done():
global done
return str(done)
@app.route('/info', methods=['GET'])
def get_info():
global info
return json.dumps(info)
@app.route('/action', methods=['POST'])
def perform_action():
global state, reward, done, info, score
action = int(request.form['id'])
# [TODO] Check to see if 'action' is valid
state, reward, done, info = env.step(action)
score += reward
content = {}
content['state'] = state
content['reward'] = reward
content['done'] = done
content['info'] = info
return pickle.dumps(content)
@app.route('/reset')
def reset_env():
global env, state, reward, done, info, score
state = env.reset()
reward = 0
done = False
info = {}
score = 0
return pickle_state()