140 lines
3.3 KiB
Python
140 lines
3.3 KiB
Python
import sys
|
|
import gym
|
|
from flask import Flask
|
|
from flask import request
|
|
import pickle
|
|
import json
|
|
|
|
# Make it so that it doesn't log every HTTP request
|
|
import logging
|
|
log = logging.getLogger('werkzeug')
|
|
log.setLevel(logging.ERROR)
|
|
|
|
##
|
|
# OpenAI Gym State
|
|
##
|
|
# environment_name = "Acrobot-v1"
|
|
environment_name = "Pong-v0"
|
|
env = gym.make(environment_name)
|
|
|
|
# Observations to release to agent
|
|
state = env.reset()
|
|
reward = 0
|
|
score = 0
|
|
done = False
|
|
info = {}
|
|
|
|
##
|
|
# Helper Functions
|
|
##
|
|
# [TODO] Evaluate whether pickling is the right option here
|
|
def pickle_state():
|
|
global state
|
|
return pickle.dumps(state)
|
|
|
|
|
|
|
|
##
|
|
# Flask Environment
|
|
##
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/environment', methods=['GET'])
|
|
def get_env():
|
|
global env, environment_name
|
|
if request.args.get('shape') is not None:
|
|
shape = {}
|
|
shape['observation'] = env.observation_space.shape
|
|
shape['action'] = env.action_space.n
|
|
return json.dumps(shape)
|
|
return environment_name
|
|
|
|
@app.route('/gym', methods=['GET'])
|
|
def get_extra_data():
|
|
global env
|
|
data = {}
|
|
if request.args.get('action_space') is not None:
|
|
data['action_space'] = env.action_space
|
|
if request.args.get('observation_space') is not None:
|
|
data['observation_space'] = env.observation_space
|
|
if request.args.get('reward_range') is not None:
|
|
data['reward_range'] = env.reward_range
|
|
if request.args.get('metadata') is not None:
|
|
data['metadata'] = env.metadata
|
|
if request.args.get('action_meanings') is not None:
|
|
data['action_meanings'] = env.unwrapped.get_action_meanings()
|
|
return pickle.dumps(data)
|
|
|
|
@app.route('/action_space', methods=['GET'])
|
|
def get_action_space():
|
|
global env
|
|
return pickle.dumps(env.action_space)
|
|
|
|
@app.route('/observation_space', methods=['GET'])
|
|
def get_observation_space():
|
|
global env
|
|
return pickle.dumps(env.observation_space)
|
|
|
|
@app.route('/reward_range', methods=['GET'])
|
|
def get_reward_range():
|
|
global env
|
|
return pickle.dumps(env.reward_range)
|
|
|
|
@app.route('/metadata', methods=['GET'])
|
|
def get_metadata():
|
|
global env
|
|
return pickle.dumps(env.metadata)
|
|
|
|
@app.route('/action_meanings', methods=['GET'])
|
|
def get_action_meanings():
|
|
global env
|
|
return pickle.dumps(env.unwrapped.get_action_meanings())
|
|
|
|
@app.route('/state', methods=['GET'])
|
|
def get_state():
|
|
return pickle_state()
|
|
|
|
@app.route('/reward', methods=['GET'])
|
|
def get_reward():
|
|
global score, reward
|
|
if request.args.get('all') is not None:
|
|
return str(score)
|
|
else:
|
|
return str(reward)
|
|
|
|
@app.route('/done', methods=['GET'])
|
|
def is_done():
|
|
global done
|
|
return str(done)
|
|
|
|
@app.route('/info', methods=['GET'])
|
|
def get_info():
|
|
global info
|
|
return json.dumps(info)
|
|
|
|
@app.route('/action', methods=['POST'])
|
|
def perform_action():
|
|
global state, reward, done, info, score
|
|
action = int(request.form['id'])
|
|
|
|
# [TODO] Check to see if 'action' is valid
|
|
state, reward, done, info = env.step(action)
|
|
score += reward
|
|
|
|
content = {}
|
|
content['state'] = state
|
|
content['reward'] = reward
|
|
content['done'] = done
|
|
content['info'] = info
|
|
return pickle.dumps(content)
|
|
|
|
@app.route('/reset')
|
|
def reset_env():
|
|
global env, state, reward, done, info, score
|
|
state = env.reset()
|
|
reward = 0
|
|
done = False
|
|
info = {}
|
|
score = 0
|
|
return pickle_state()
|
|
|