diff --git a/environment.yml b/environment.yml index b7e7337..0af3a93 100644 --- a/environment.yml +++ b/environment.yml @@ -3,16 +3,17 @@ channels: - cogsci - defaults dependencies: - - python=3.6 - - pandas - - ipykernel - - nb_conda - - jupyter - - matplotlib - - scikit-learn - - scikit-image - - tensorflow - - keras + - python=3.6.8 + - pandas=0.23.4 + - ipykernel=5.1.0 + - nb_conda=2.2.1 + - jupyter=1.0.0 + - matplotlib=3.0.2 + - scikit-learn=0.20.2 + - scikit-image=0.14.1 + - keras=2.2.4 - pip: - - pygame - - PyGeodesy \ No newline at end of file + - pygame==1.9.4 + - PyGeodesy==18.10.29 + - gym==0.10.9 + - keras-rl==0.4.2 \ No newline at end of file diff --git a/fonts/B612-Regular.ttf b/fonts/B612-Regular.ttf new file mode 100755 index 0000000..c685a23 Binary files /dev/null and b/fonts/B612-Regular.ttf differ diff --git a/race.py b/race.py index 1099088..b372202 100644 --- a/race.py +++ b/race.py @@ -6,8 +6,9 @@ # import project code from environment import Environment from polar import Polar -from race_simulator import RaceSimulator +from simulators.race_simulator import RaceSimulator from boat import SimBoat +from settings import Settings # import configuration if os.path.isfile('config.py'): @@ -19,22 +20,13 @@ # load polar polar = Polar(os.path.join('data', 'polars', 'first-27.csv')) -# create some buoys -scale = 0.05 -buoys = [ - (52.3721693, 5.0750607), - (52.3721693 + 0.01 * scale, 5.0750607), - (52.3721693 + 0.008 * scale, 5.0750607 + 0.005 * scale), - (52.3721693 + 0.002 * scale, 5.0750607 + 0.005 * scale), -] - # create environment -env = Environment(buoys=buoys) +env = Environment(buoys=Settings.BUOYS) # instantiate all strategies strategies = [] for strategy in strategy_list: - boat = SimBoat(env, polar=polar, random_color=True).set_waypoint(1) + boat = SimBoat(env, polar=polar, random_color=True, name=strategy.__name__).set_waypoint(1) strategies.append(strategy(boat, env)) # start the simulator diff --git a/src/boat.py b/src/boat.py index 7780276..910da3b 100644 --- a/src/boat.py +++ b/src/boat.py @@ -28,7 +28,9 @@ class Boat: # distance in meters from waypoint to skip to next waypoint DIST_NEXT_WAYPOINT = 2 - def __init__(self, env, random_color=False, name='no-name'): + def __init__(self, env, random_color=False, name='no-name', keep_log=True): + self._keep_log = keep_log + self._name = name self.rudder_angle = 0. self.target_rudder_angle = 0. self.boat_angle = 0. @@ -40,11 +42,7 @@ def __init__(self, env, random_color=False, name='no-name'): self._env = env self.history = pd.DataFrame() self.windspeed_shuffle = True - self._name = name - self._position = (52.3721693, 5.0750607) self._waypoint = None - self._bearing = 0. - self._distance = 0. self._marks_passed = 0 # set a boat color @@ -57,6 +55,19 @@ def __init__(self, env, random_color=False, name='no-name'): self._draw_fps = Settings.DRAW_FPS self._strategy = None + self._position = None + + # set initial position of boat + self.reset_boat_position() + + def reset_boat_position(self): + self._position = (52.3721693, 5.0750607) + + def set_heading(self, heading): + self.boat_angle = heading + + def get_heading(self): + return self.boat_angle def set_strategy(self, strategy): self._strategy = strategy @@ -97,6 +108,10 @@ def calculate_speed(self): speed += 1 return speed + def reset_rudder(self): + self.target_rudder_angle = 0 + self.rudder_angle = 0 + def move(self): """Simulates or fetches movement of boat""" @@ -138,22 +153,31 @@ def update(self): # simulate or fetch boat movements self.move() - # run navigation - self.nav() + # run navigation when steering strategy is active + if self._strategy is not None: + self.nav() + + # skip to next waypoint if we're there + if self._waypoint is not None: + if self.get_distance_to_waypoint() < self.DIST_NEXT_WAYPOINT: + logging.info("hit waypoint") + self._marks_passed += 1 + self._waypoint = self._waypoint + 1 if self._waypoint < len(self._env.get_buoys()) - 1 else 0 # save history - self.history = self.history.append([{ - 'datetime': dt.now(), - 'boat_angle': self.boat_angle + np.random.normal(0, 1), - 'boat_heel': self.boat_heel if np.random.uniform(0, 1) < 0.99 else np.nan, - 'boat_speed': self.speed + np.random.normal(0, 0.25), - 'target_angle': self.target_angle if np.random.uniform(0, 1) < 0.99 else np.nan, - 'course_error': self.get_course_error(), - 'rudder_angle': self.rudder_angle, - 'wind_direction': self._env.wind_direction, - 'wind_speed': self._env.wind_speed if np.random.uniform(0, 1) < 0.99 else np.random.randint(100, 150), - 'angle_of_attack': self.get_angle_of_attack() - }]) + if self._keep_log: + self.history = self.history.append([{ + 'datetime': dt.now(), + 'boat_angle': self.boat_angle + np.random.normal(0, 1), + 'boat_heel': self.boat_heel if np.random.uniform(0, 1) < 0.99 else np.nan, + 'boat_speed': self.speed + np.random.normal(0, 0.25), + 'target_angle': self.target_angle if np.random.uniform(0, 1) < 0.99 else np.nan, + 'course_error': self.get_course_error(), + 'rudder_angle': self.rudder_angle, + 'wind_direction': self._env.wind_direction, + 'wind_speed': self._env.wind_speed if np.random.uniform(0, 1) < 0.99 else np.random.randint(100, 150), + 'angle_of_attack': self.get_angle_of_attack() + }]) def nav(self): """ Update navigation variables and determine new course """ @@ -161,19 +185,11 @@ def nav(self): if self._waypoint is None: return - buoys = self._env.get_buoys() - - # get target position from waypoint - target_pos = buoys[self._waypoint] - # determine bearing to waypoint - self._bearing = geo.bearing(self._position[0], self._position[1], target_pos[0], target_pos[1]) - - # distance to waypoint - self._distance = geo.haversine(self._position[0], self._position[1], target_pos[0], target_pos[1]) + bearing = self.get_bearing_to_waypoint() # calculate new true wind angle to steer - new_twa = calc_angle(self._env.wind_direction, self._bearing) + new_twa = calc_angle(self._env.wind_direction, bearing) # get angles for optimal vmg upwind_twa = self._strategy.get_upwind_twa() @@ -189,12 +205,7 @@ def nav(self): # otherwise, steer directly to waypoint else: - self.set_target_angle(self._bearing) - - # skip to next waypoint if we're there - if self._distance < self.DIST_NEXT_WAYPOINT: - self._marks_passed += 1 - self._waypoint = self._waypoint + 1 if self._waypoint < len(buoys)-1 else 0 + self.set_target_angle(bearing) def set_twa(self, twa, tack=False): """ steer a true wind angle on the current (target) tack """ @@ -220,7 +231,10 @@ def get_position(self): return self._position def set_waypoint(self, waypoint): + """set new waypoint, also resets number of marks passed""" + self._waypoint = waypoint + self._marks_passed = 0 return self def get_boat_color(self): @@ -230,7 +244,14 @@ def get_marks_passed(self): return self._marks_passed def get_distance_to_waypoint(self): - return self._distance + buoys = self._env.get_buoys() + target_pos = buoys[self._waypoint] + return geo.haversine(self._position[0], self._position[1], target_pos[0], target_pos[1]) + + def get_bearing_to_waypoint(self): + buoys = self._env.get_buoys() + target_pos = buoys[self._waypoint] + return geo.bearing(self._position[0], self._position[1], target_pos[0], target_pos[1]) def get_name(self): return self._name diff --git a/src/drawers/race_drawer.py b/src/drawers/race_drawer.py index 1e8fc5a..edddd12 100644 --- a/src/drawers/race_drawer.py +++ b/src/drawers/race_drawer.py @@ -1,5 +1,9 @@ import pygame +import pandas as pd +import os.path + from tools import rotate_point +from environment import Environment class RaceDrawer: @@ -9,7 +13,7 @@ class RaceDrawer: RACE_CANVAS_COLOR = (33, 66, 99) ARROW_SHAPE = [(0, 100), (0, 200), (200, 200), (200, 300), (300, 150), (200, 0), (200, 100)] - ARROW_COLOR = (0, 255, 0) + ARROW_COLOR = (9, 209, 97) ARROW_POS = [350, 250] ARROW_ORIGIN = [150, 100] ARROW_SCALE = 0.2 @@ -19,11 +23,26 @@ class RaceDrawer: BOAT_COLOR = (255, 255, 255) BOAT_SCALE = 0.1 - def __init__(self, screen): - self._screen = screen + SIZE = 1100, 730 + BG_COLOR = 0, 0, 0 + TEXT_COLOR = 255, 255, 255 + + def __init__(self, boats: list, env: Environment): + self._boats = boats + self._env = env self._offset = (0, 0) self._scale = 0 + pygame.init() + pygame.font.init() + + self._font = pygame.font.Font(os.path.join('fonts', 'B612-Regular.ttf'), 20) + self._smallfont = pygame.font.Font(os.path.join('fonts', 'B612-Regular.ttf'), 15) + self._screen = pygame.display.set_mode(self.SIZE) + + # scale the race canvas + self.autoscale(self._env.get_buoys()) + def autoscale(self, buoys): """ Scale race canvas to fit all buoys """ lat, lon = zip(*buoys) @@ -98,4 +117,38 @@ def draw_buoys(self, buoys): x, y = self.translate_pos(position) pygame.draw.circle(self._screen, self.BUOY_COLOR, (x, y), 5) - + def write_text(self, text, row, color=(255, 255, 255)): + pos = 740, 30 + (row * 30) + textsurface = self._font.render(text, True, color) + self._screen.blit(textsurface, pos) + + def draw(self): + self._screen.fill(self.BG_COLOR) + self.draw_env(self._env) + self.draw_buoys(self._env.get_buoys()) + + # draw all boats + scoreboard = [] + for boat in self._boats: + + # update boat and draw + self.draw_boat(boat) + + # update scoreboard + scoreboard.append({ + 'name': boat.get_name(), + 'color': boat.get_boat_color(), + 'marks_passed': boat.get_marks_passed(), + 'dtw': boat.get_distance_to_waypoint() + }) + + # show scoreboard + scoreboard = pd.DataFrame(scoreboard).sort_values(by=['marks_passed', 'dtw'], ascending=[False, True]) + i = 0 + for _, row in scoreboard.iterrows(): + text = "%s (DTW: %dm)" % (row['name'], row.dtw) + self.write_text(text, i, row.color) + i += 1 + + # display new frame + pygame.display.flip() diff --git a/src/drawers/sim_drawer.py b/src/drawers/sim_drawer.py index bbe1a65..a0c3905 100644 --- a/src/drawers/sim_drawer.py +++ b/src/drawers/sim_drawer.py @@ -1,4 +1,7 @@ import pygame + +from boat import Boat +from environment import Environment from tools import rotate_point, add_vector, rotate_vectors @@ -19,11 +22,21 @@ class SimDrawer: CENTER = (250, 250) - def __init__(self, screen): - self._screen = screen + TEXT_COLOR = 255, 255, 255 + SIZE = 800, 600 + BG_COLOR = 0, 0, 255 + + def __init__(self): self._offset = (0, 0) self._scale = 0 + pygame.init() + pygame.font.init() + + self._font = pygame.font.SysFont('Arial', 30) + self._smallfont = pygame.font.SysFont('Arial', 20) + self._screen = pygame.display.set_mode(self.SIZE) + def draw_boat(self, boat): # draw boat vectors = self.BOAT_SHAPE.copy() @@ -64,3 +77,41 @@ def draw_env(self, env): pygame.draw.polygon(self._screen, self.ARROW_COLOR, vectors) pygame.draw.circle(self._screen, (100, 100, 100), self.CENTER, 200, 5) + + def write_text(self, text, row): + pos = 500, 30 + (row * 30) + textsurface = self._font.render(text, True, self.TEXT_COLOR) + self._screen.blit(textsurface, pos) + + def draw_stats(self, boat, env, strategy_name): + # calculate mean of absolute course error + if boat.history.shape[0] > 0: + mae = boat.history.course_error.abs().mean() + else: + mae = 0 + + self.write_text("Boat angle: %.1f°" % boat.boat_angle, 0) + self.write_text("Target angle: %.1f°" % boat.target_angle, 1) + self.write_text("Current deviation: %.1f°" % boat.get_course_error(), 2) + self.write_text("Boat heel: %.1f°" % boat.boat_heel, 3) + self.write_text("Rudder angle: %.1f°" % boat.rudder_angle, 4) + self.write_text("Boat speed: %.1f knots" % boat.speed, 5) + self.write_text("Angle of attack: %.1f°" % boat.get_angle_of_attack(), 6) + self.write_text("Wind direction: %.1f°" % env.wind_direction, 8) + self.write_text("Wind speed: %.1f knots" % env.wind_speed, 9) + self.write_text("MAE: %.1f°" % mae, 11) + self.write_text("Strategy: %s" % strategy_name, 12) + + textsurface = self._smallfont.render( + "Press keys to change: 1/2 for target angle, 3/4 for wind direction, 5/6 for wind speed, s to change strategy, q to quit", True, self.TEXT_COLOR) + self._screen.blit(textsurface, (20, 565)) + + def draw(self, boat: Boat, env: Environment, strategy_name='Undefined'): + # redraw objects + self._screen.fill(self.BG_COLOR) + self.draw_boat(boat) + self.draw_env(env) + self.draw_stats(boat, env, strategy_name) + + # display new frame + pygame.display.flip() diff --git a/src/gym_sail/__init__.py b/src/gym_sail/__init__.py new file mode 100644 index 0000000..2bee3b7 --- /dev/null +++ b/src/gym_sail/__init__.py @@ -0,0 +1,16 @@ +from gym.envs.registration import register + +register( + id='sail-v0', + entry_point='gym_sail.envs:SailEnv', +) + +register( + id='sail-continuous-v0', + entry_point='gym_sail.envs:SailEnvContinuous', +) + +register( + id='race-continuous-v0', + entry_point='gym_sail.envs:RaceEnvContinuous', +) \ No newline at end of file diff --git a/src/gym_sail/envs/__init__.py b/src/gym_sail/envs/__init__.py new file mode 100644 index 0000000..443f4a4 --- /dev/null +++ b/src/gym_sail/envs/__init__.py @@ -0,0 +1,3 @@ +from gym_sail.envs.sail_env import SailEnv +from gym_sail.envs.sail_env_continuous import SailEnvContinuous +from gym_sail.envs.race_env_continuous import RaceEnvContinuous diff --git a/src/gym_sail/envs/race_env_continuous.py b/src/gym_sail/envs/race_env_continuous.py new file mode 100644 index 0000000..be4caf4 --- /dev/null +++ b/src/gym_sail/envs/race_env_continuous.py @@ -0,0 +1,82 @@ +import gym +from gym import error, spaces, utils +from gym.utils import seeding +import numpy as np +import os +import random + +from boat import * +from environment import Environment +from polar import Polar +from drawers.race_drawer import RaceDrawer +from settings import Settings + + +class RaceEnvContinuous(gym.Env): + metadata = {'render.modes': ['human']} + + def __init__(self): + self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32) + self.observation_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + + self.seed() + self.observation = None + + # start simulator + polar = Polar(os.path.join('data', 'polars', 'first-27.csv')) + self._env = Environment(buoys=Settings.BUOYS) + self._boat = SimBoat(self._env, polar=polar, keep_log=False).set_waypoint(1) + self._drawer = RaceDrawer([self._boat], self._env) + + self.reset() + self._step = 0 + + def render(self, mode='human', close=False): + self._drawer.draw() + + # should we quit? + for event in pygame.event.get(): + if event.type == pygame.KEYDOWN: + if event.key == pygame.K_q: + exit() + + def seed(self, seed=None): + seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + #assert self.action_space.contains(action) + rudder_angle = float(action) * 30 + self._boat.set_target_rudder_angle(rudder_angle) + + # update boat and environment + self._env.update() + self._boat.update() + + mark_reward = self._boat.get_marks_passed() * 100 + reward = mark_reward - self._boat.get_distance_to_waypoint() + + # self._step += 1 + done = False + # if self._step > 300: + # self._step = 0 + # done = True + # print("done")q + + return self.get_observation(), reward, done, {"debug": 123} + + def get_observation(self): + delta = self._boat.get_heading() - self._boat.get_bearing_to_waypoint() + delta = (delta + 180) % 360 - 180 + return ( + delta / 180, + self._boat.rudder_angle / 30, + self._boat.get_angle_of_attack() / 180 + ) + + def reset(self): + self._boat.reset_rudder() + self._boat.reset_boat_position() + self._boat.set_heading(random.randint(-90, 90)) + self._boat.set_waypoint(1) + return self.get_observation() diff --git a/src/gym_sail/envs/sail_env.py b/src/gym_sail/envs/sail_env.py new file mode 100644 index 0000000..dd200f6 --- /dev/null +++ b/src/gym_sail/envs/sail_env.py @@ -0,0 +1,87 @@ +import gym +from gym import error, spaces, utils +from gym.utils import seeding +import numpy as np +import os + +from boat import * +from environment import Environment +from polar import Polar +from drawers.sim_drawer import SimDrawer + + +class SailEnv(gym.Env): + metadata = {'render.modes': ['human']} + + def __init__(self): + self.action_space = spaces.Discrete(3) + + self.observation_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32) + + self.seed() + self.observation = None + + # start simulator + polar = Polar(os.path.join('data', 'polars', 'first-27.csv')) + self._env = Environment() + self._boat = SimBoat(self._env, polar=polar, keep_log=False) + self._drawer = SimDrawer() + + self.reset() + self._step = 0 + + def render(self, mode='human', close=False): + self._drawer.draw(self._boat, self._env, 'SailEnv') + + # should we quit? + for event in pygame.event.get(): + if event.type == pygame.KEYDOWN: + if event.key == pygame.K_q: + exit() + + def seed(self, seed=None): + seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action) + + if action == 0: + # steer left + self._boat.steer(-1) + + elif action == 1: + # do nothing + pass + + elif action == 2: + # steer right + self._boat.steer(1) + + # update boat and environment + self._env.update() + self._boat.update() + + reward = -abs(self._boat.get_course_error() / 180) + + self._step += 1 + done = False + if self._step > 300: + self._step = 0 + done = True + + return self.get_observation(), reward, done, {"debug": 123} + + def get_observation(self): + return ( + self._boat.get_course_error() / 180, + self._boat.rudder_angle / 30, + #self._boat.get_angle_of_attack() + ) + + def reset(self): + print("resetting") + self._boat.reset_rudder() + self._env.shuffle() + self._boat.shuffle() + return self.get_observation() diff --git a/src/gym_sail/envs/sail_env_continuous.py b/src/gym_sail/envs/sail_env_continuous.py new file mode 100644 index 0000000..c4a53c0 --- /dev/null +++ b/src/gym_sail/envs/sail_env_continuous.py @@ -0,0 +1,76 @@ +import gym +from gym import error, spaces, utils +from gym.utils import seeding +import numpy as np +import os + +from boat import * +from environment import Environment +from polar import Polar +from drawers.sim_drawer import SimDrawer + + +class SailEnvContinuous(gym.Env): + metadata = {'render.modes': ['human']} + + def __init__(self): + self.action_space = spaces.Box(low=-30, high=30, shape=(1,), dtype=np.float32) + self.observation_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32) + + self.seed() + self.observation = None + + # start simulator + polar = Polar(os.path.join('..', '..', 'data', 'polars', 'first-27.csv')) + self._env = Environment() + self._boat = SimBoat(self._env, polar=polar, keep_log=False) + self._drawer = SimDrawer() + + self.reset() + self._step = 0 + + def render(self, mode='human', close=False): + self._drawer.draw(self._boat, self._env) + + # should we quit? + for event in pygame.event.get(): + if event.type == pygame.KEYDOWN: + if event.key == pygame.K_q: + exit() + + def seed(self, seed=None): + seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action) + action = action * 30 + self._boat.set_target_rudder_angle(int(action)) + + # update boat and environment + self._env.update() + self._boat.update() + + reward = -abs(self._boat.get_course_error() / 180) + + self._step += 1 + done = False + if self._step > 300: + self._step = 0 + done = True + + return self.get_observation(), reward, done, {"debug": 123} + + def get_observation(self): + return ( + self._boat.get_course_error() / 180, + self._boat.rudder_angle / 30, + #self._boat.get_angle_of_attack() + ) + + def reset(self): + print("resetting") + self._boat.reset_rudder() + self._env.shuffle() + self._boat.shuffle() + return self.get_observation() diff --git a/src/race_simulator.py b/src/race_simulator.py deleted file mode 100644 index 87e744e..0000000 --- a/src/race_simulator.py +++ /dev/null @@ -1,113 +0,0 @@ -import pygame -import pandas as pd -import threading - -from drawers.race_drawer import RaceDrawer -from settings import Settings - - -class RaceUpdateThread (threading.Thread): - """Thread for updating the steering strategy""" - - def __init__(self, strategies): - threading.Thread.__init__(self) - self._strategies = strategies - self._clock = pygame.time.Clock() - - def run(self): - while 1: - for strategy in self._strategies: - # update steering strategy - strategy.update() - - # update strategy with current fps - fps = self._clock.get_fps() - if fps > 0: - strategy.set_update_fps(fps) - - # sleep remainder of frame - self._clock.tick(Settings.UPDATE_FPS) - - -class RaceSimulator: - SIZE = 1024, 768 - BG_COLOR = 0, 0, 0 - TEXT_COLOR = 255, 255, 255 - - def __init__(self, env, strategies): - self._env = env - self._strategies = strategies - - pygame.init() - pygame.font.init() - - self._font = pygame.font.SysFont('Arial', 30) - self._smallfont = pygame.font.SysFont('Arial', 20) - self._screen = pygame.display.set_mode(self.SIZE) - self._drawer = RaceDrawer(self._screen) - self._clock = pygame.time.Clock() - - def write_text(self, text, row, color=(255, 255, 255)): - pos = 740, 30 + (row * 30) - textsurface = self._font.render(text, True, color) - self._screen.blit(textsurface, pos) - - def run(self): - # scale the race canvas - self._drawer.autoscale(self._env.get_buoys()) - - # start thread for steering strategy - thread = RaceUpdateThread(self._strategies) - thread.daemon = True - thread.start() - - while 1: - self._screen.fill(self.BG_COLOR) - self._env.update() - - self._drawer.draw_env(self._env) - self._drawer.draw_buoys(self._env.get_buoys()) - - # update all boats - scoreboard = [] - for i, strategy in enumerate(self._strategies): - - # update boat and draw - boat = strategy.get_boat() - boat.update() - self._drawer.draw_boat(boat) - - # update scoreboard - scoreboard.append({ - 'name': strategy.get_name(), - 'color': boat.get_boat_color(), - 'marks_passed': boat.get_marks_passed(), - 'dtw': boat.get_distance_to_waypoint() - }) - - # update boat with current fps - fps = self._clock.get_fps() - if fps > 0: - boat.set_draw_fps(fps) - - # show scoreboard - scoreboard = pd.DataFrame(scoreboard).sort_values(by=['marks_passed', 'dtw'], ascending=[False, True]) - i = 0 - for _, row in scoreboard.iterrows(): - text = "%s (DTW: %dm)" % (row['name'], row.dtw) - self.write_text(text, i, row.color) - i += 1 - - # display new frame - pygame.display.flip() - - # check key events - for event in pygame.event.get(): - if event.type == pygame.KEYDOWN: - - # save log and quit - if event.key == pygame.K_q: - exit() - - # sleep for the remainder of this frame - self._clock.tick(Settings.DRAW_FPS) diff --git a/src/rl/race_continuous.py b/src/rl/race_continuous.py new file mode 100644 index 0000000..c056cf9 --- /dev/null +++ b/src/rl/race_continuous.py @@ -0,0 +1,81 @@ +import time + +import numpy as np +import gym +import os.path + +from keras.models import Sequential, Model +from keras.layers import Dense, Activation, Flatten, Input, Concatenate +from keras.optimizers import Adam + +from rl.agents import DDPGAgent +from rl.memory import SequentialMemory +from rl.random import OrnsteinUhlenbeckProcess + +import gym_sail + +ENV_NAME = 'race-continuous-v0' + +LOAD = True + +# Get the environment and extract the number of actions. +env = gym.make(ENV_NAME) +np.random.seed(123) +env.seed(123) + +assert len(env.action_space.shape) == 1 +nb_actions = env.action_space.shape[0] + +# Next, we build a very simple model. +# todo back to linear activation? +actor = Sequential() +actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(32)) +actor.add(Activation('relu')) +actor.add(Dense(nb_actions)) +actor.add(Activation('tanh')) +print(actor.summary()) + +action_input = Input(shape=(nb_actions,), name='action_input') +observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') +flattened_observation = Flatten()(observation_input) +x = Concatenate()([action_input, flattened_observation]) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(1)(x) +x = Activation('linear')(x) +critic = Model(inputs=[action_input, observation_input], outputs=x) +print(critic.summary()) + +# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and +# even the metrics! +memory = SequentialMemory(limit=100000, window_length=1) +random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) +agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, + memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, + random_process=random_process, gamma=.99, target_model_update=1e-3) +agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) + +if LOAD: + # load weights + t = 1548451946 + model_filename = os.path.join('data', 'models', 'ddpg_%s_%d_weights.h5f' % (ENV_NAME, t)) + agent.load_weights(model_filename), +else: + # train + agent.fit(env, nb_steps=10000000, visualize=False, verbose=1, nb_max_episode_steps=4000) + + # After training is done, we save the final weights. + model_filename = os.path.join('data', 'models', 'ddpg_%s_%d_weights.h5f' % (ENV_NAME, int(time.time()))) + agent.save_weights(model_filename, overwrite=True) + +# Finally, evaluate our algorithm for 5 episodes. +agent.test(env, nb_episodes=100, visualize=True, nb_max_episode_steps=4000) \ No newline at end of file diff --git a/src/rl/steer_continuous.py b/src/rl/steer_continuous.py new file mode 100644 index 0000000..eada254 --- /dev/null +++ b/src/rl/steer_continuous.py @@ -0,0 +1,74 @@ +import numpy as np +import gym +import os.path + +from keras.models import Sequential, Model +from keras.layers import Dense, Activation, Flatten, Input, Concatenate +from keras.optimizers import Adam + +from rl.agents import DDPGAgent +from rl.memory import SequentialMemory +from rl.random import OrnsteinUhlenbeckProcess + +import gym_sail + +ENV_NAME = 'sail-continuous-v0' + + +# Get the environment and extract the number of actions. +env = gym.make(ENV_NAME) +np.random.seed(123) +env.seed(123) +assert len(env.action_space.shape) == 1 +nb_actions = env.action_space.shape[0] + +# Next, we build a very simple model. +actor = Sequential() +actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) +actor.add(Dense(16)) +actor.add(Activation('relu')) +actor.add(Dense(16)) +actor.add(Activation('relu')) +actor.add(Dense(16)) +actor.add(Activation('relu')) +actor.add(Dense(nb_actions)) +actor.add(Activation('linear')) +print(actor.summary()) + +action_input = Input(shape=(nb_actions,), name='action_input') +observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') +flattened_observation = Flatten()(observation_input) +x = Concatenate()([action_input, flattened_observation]) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(32)(x) +x = Activation('relu')(x) +x = Dense(1)(x) +x = Activation('linear')(x) +critic = Model(inputs=[action_input, observation_input], outputs=x) +print(critic.summary()) + +# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and +# even the metrics! +memory = SequentialMemory(limit=100000, window_length=1) +random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) +agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, + memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, + random_process=random_process, gamma=.99, target_model_update=1e-3) +agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) + +# Okay, now it's time to learn something! We visualize the training here for show, but this +# slows down training quite a lot. You can always safely abort the training prematurely using +# Ctrl + C. +#agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=200) + +model_filename = os.path.join('..', '..', 'data', 'models', 'ddpg_{}_weights.h5f'.format(ENV_NAME)) +agent.load_weights(model_filename) + +# After training is done, we save the final weights. +#agent.save_weights(model_filename, overwrite=True) + +# Finally, evaluate our algorithm for 5 episodes. +agent.test(env, nb_episodes=100, visualize=True, nb_max_episode_steps=200) \ No newline at end of file diff --git a/src/rl/steer_discrete.py b/src/rl/steer_discrete.py new file mode 100644 index 0000000..80e2776 --- /dev/null +++ b/src/rl/steer_discrete.py @@ -0,0 +1,67 @@ +import numpy as np +import gym +import os +import time + +from keras.models import Sequential +from keras.layers import Dense, Activation, Flatten +from keras.optimizers import Adam + +from rl.agents.dqn import DQNAgent +from rl.policy import BoltzmannQPolicy +from rl.memory import SequentialMemory + +# fixed crash that occures after a while, see: https://github.com/openai/spinningup/issues/16 +os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' + +import gym_sail + +ENV_NAME = 'sail-v0' +LOAD = True + +# Get the environment and extract the number of actions. +env = gym.make(ENV_NAME) +np.random.seed(123) +env.seed(123) +nb_actions = env.action_space.n + +# Next, we build a very simple model. +model = Sequential() +model.add(Flatten(input_shape=(3,) + env.observation_space.shape)) +model.add(Dense(16)) +model.add(Activation('relu')) +model.add(Dense(16)) +model.add(Activation('relu')) +model.add(Dense(16)) +model.add(Activation('relu')) +model.add(Dense(nb_actions)) +model.add(Activation('linear')) +print(model.summary()) + +# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and +# even the metrics! +memory = SequentialMemory(limit=50000, window_length=3) +policy = BoltzmannQPolicy() +dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, + target_model_update=1e-2, policy=policy) +dqn.compile(Adam(lr=1e-4), metrics=['mae']) + +# load weights +model_filename = os.path.join('data', 'models', 'ddpg_%s_%d_weights.h5f') +if LOAD: + dqn.load_weights(model_filename % (ENV_NAME, 1554729826)), # 15 minutes of training (6h real time) + # dqn.load_weights(model_filename % (ENV_NAME, 1554731807)), # 1 hour of training (24h real time) + +# Okay, now it's time to learn something! We visualize the training here for show, but this +# slows down training quite a lot. You can always safely abort the training prematurely using +# Ctrl + C. +dqn.fit(env, nb_steps=1000000, visualize=False, verbose=2) + +# After training is done, we save the final weights. +filename = model_filename % (ENV_NAME, time.time()) +dqn.save_weights(filename, overwrite=True) +print("saved weights to %s" % filename) + + +# Finally, evaluate our algorithm for 5 episodes. +dqn.test(env, nb_episodes=500, visualize=True) \ No newline at end of file diff --git a/src/rl/steer_test.py b/src/rl/steer_test.py new file mode 100644 index 0000000..3056e0f --- /dev/null +++ b/src/rl/steer_test.py @@ -0,0 +1,24 @@ +import gym +import time + +# # Will be supported in future releases +# from gym.envs import mujoco +# +# mujoco.AntEnv + +import gym_sail + +env = gym.make('sail-v0') + +for i_episode in range(20): + observation = env.reset() + for t in range(100): + env.render() + action = env.action_space.sample() + observation, reward, done, info = env.step(action) + print(observation, reward, action) + if done: + print("Episode finished after {} timesteps".format(t+1)) + break + + #time.sleep(1) diff --git a/src/settings.py b/src/settings.py index 6bfe62c..936b0dd 100644 --- a/src/settings.py +++ b/src/settings.py @@ -6,3 +6,15 @@ class Settings: # number of graphical frames per second DRAW_FPS = 20 + + # scale the race area + BUOY_SCALE = 0.05 + + # definition of buoy positions + BUOYS = [ + (52.3721693, 5.0750607), + (52.3721693 + 0.01 * BUOY_SCALE, 5.0750607), + (52.3721693 + 0.008 * BUOY_SCALE, 5.0750607 + 0.005 * BUOY_SCALE), + (52.3721693 + 0.002 * BUOY_SCALE, 5.0750607 + 0.005 * BUOY_SCALE), + ] + diff --git a/src/simulator.py b/src/simulator.py index 20b47c2..059ae6d 100644 --- a/src/simulator.py +++ b/src/simulator.py @@ -2,6 +2,7 @@ import datetime import time import threading +import tensorflow as tf from boat import Boat from environment import Environment @@ -13,17 +14,23 @@ class UpdateThread (threading.Thread): """Thread for updating the steering strategy""" - def __init__(self, boat: Boat, env: Environment, strategy: Base): + def __init__(self, boat: Boat, env: Environment, strategy: Base, graph): threading.Thread.__init__(self) self._boat = boat self._env = env self._strategy = strategy self._clock = pygame.time.Clock() + self._graph = graph + + def set_strategy(self, strategy: Base): + self._strategy = strategy def run(self): while 1: # update steering strategy - self._strategy.update() + # need to set default graph to enable keras models to run in a different thread + with self._graph.as_default(): + self._strategy.update() # sleep remainder of frame self._clock.tick(Settings.UPDATE_FPS) @@ -37,10 +44,6 @@ def run(self): class Simulator: """Single boat simulator""" - SIZE = 800, 600 - BG_COLOR = 0, 0, 255 - TEXT_COLOR = 255, 255, 255 - def __init__(self, boat: Boat, env: Environment, strategies: list, shuffle_interval=10): self._boat = boat self._env = env @@ -51,20 +54,9 @@ def __init__(self, boat: Boat, env: Environment, strategies: list, shuffle_inter self._strategy_id = 0 self._strategy = strategies[self._strategy_id] - pygame.init() - pygame.font.init() - - self._font = pygame.font.SysFont('Arial', 30) - self._smallfont = pygame.font.SysFont('Arial', 20) - self._screen = pygame.display.set_mode(self.SIZE) - self._drawer = SimDrawer(self._screen) + self._drawer = SimDrawer() self._clock = pygame.time.Clock() - def write_text(self, text, row): - pos = 500, 30 + (row * 30) - textsurface = self._font.render(text, True, self.TEXT_COLOR) - self._screen.blit(textsurface, pos) - def run(self): shuffle_time = time.time() + self._shuffle_interval @@ -73,7 +65,7 @@ def run(self): self._boat.shuffle() # start thread for steering strategy - thread = UpdateThread(self._boat, self._env, self._strategy) + thread = UpdateThread(self._boat, self._env, self._strategy, tf.get_default_graph()) thread.daemon = True thread.start() @@ -83,34 +75,7 @@ def run(self): self._boat.update() # redraw objects - self._screen.fill(self.BG_COLOR) - self._drawer.draw_boat(self._boat) - self._drawer.draw_env(self._env) - - # calculate mean of absolute course error - if self._boat.history.shape[0] > 0: - mae = self._boat.history.course_error.abs().mean() - else: - mae = 0 - - self.write_text("Boat angle: %.1f°" % self._boat.boat_angle, 0) - self.write_text("Target angle: %.1f°" % self._boat.target_angle, 1) - self.write_text("Current deviation: %.1f°" % self._boat.get_course_error(), 2) - self.write_text("Boat heel: %.1f°" % self._boat.boat_heel, 3) - self.write_text("Rudder angle: %.1f°" % self._boat.rudder_angle, 4) - self.write_text("Boat speed: %.1f knots" % self._boat.speed, 5) - self.write_text("Angle of attack: %.1f°" % self._boat.get_angle_of_attack(), 6) - self.write_text("Wind direction: %.1f°" % self._env.wind_direction, 8) - self.write_text("Wind speed: %.1f knots" % self._env.wind_speed, 9) - self.write_text("MAE: %.1f°" % mae, 11) - self.write_text("Strategy: %s" % type(self._strategy).__name__, 13) - - textsurface = self._smallfont.render( - "Press keys to change: 1/2 for target angle, 3/4 for wind direction, 5/6 for wind speed, s to change strategy, q to quit", True, self.TEXT_COLOR) - self._screen.blit(textsurface, (20, 565)) - - # display new frame - pygame.display.flip() + self._drawer.draw(self._boat, self._env, self._strategy.get_name()) # shuffle once in a while if self._shuffle_interval and time.time() > shuffle_time: @@ -126,6 +91,7 @@ def run(self): if event.key == pygame.K_s: self._strategy_id = self._strategy_id + 1 if self._strategy_id < len(self._strategies) - 1 else 0 self._strategy = self._strategies[self._strategy_id] + thread.set_strategy(self._strategy) # save log and quit if event.key == pygame.K_q: diff --git a/src/simulators/race_simulator.py b/src/simulators/race_simulator.py new file mode 100644 index 0000000..d84ad77 --- /dev/null +++ b/src/simulators/race_simulator.py @@ -0,0 +1,80 @@ +import pygame +import pandas as pd +import threading +import tensorflow as tf + +from drawers.race_drawer import RaceDrawer +from settings import Settings + + +class RaceUpdateThread (threading.Thread): + """Thread for updating the steering strategy""" + + def __init__(self, strategies, graph): + threading.Thread.__init__(self) + self._strategies = strategies + self._clock = pygame.time.Clock() + self._graph = graph + + def run(self): + while 1: + for strategy in self._strategies: + + # update steering strategy + # need to set default graph to enable keras models to run in a different thread + with self._graph.as_default(): + strategy.update() + + # update strategy with current fps + fps = self._clock.get_fps() + if fps > 0: + strategy.set_update_fps(fps) + + # sleep remainder of frame + self._clock.tick(Settings.UPDATE_FPS) + + +class RaceSimulator: + + def __init__(self, env, strategies): + self._env = env + self._strategies = strategies + + boats = [strategy.get_boat() for strategy in strategies] + + self._drawer = RaceDrawer(boats, env) + self._clock = pygame.time.Clock() + + def run(self): + + # start thread for steering strategy + thread = RaceUpdateThread(self._strategies, tf.get_default_graph()) + thread.daemon = True + thread.start() + + while 1: + self._env.update() + + # update all boats + for strategy in self._strategies: + boat = strategy.get_boat() + boat.update() + + # update boat with current fps + fps = self._clock.get_fps() + if fps > 0: + boat.set_draw_fps(fps) + + # draw objects + self._drawer.draw() + + # check key events + for event in pygame.event.get(): + if event.type == pygame.KEYDOWN: + + # save log and quit + if event.key == pygame.K_q: + exit() + + # sleep for the remainder of this frame + self._clock.tick(Settings.DRAW_FPS) diff --git a/src/strategies/base.py b/src/strategies/base.py index 06d18e0..4124c50 100644 --- a/src/strategies/base.py +++ b/src/strategies/base.py @@ -29,12 +29,12 @@ def get_upwind_twa(self): def need_to_tack(self) -> bool: """ Do we need to tack? """ - diff = calc_angle(self._boat.target_angle, self._boat._bearing) + diff = calc_angle(self._boat.target_angle, self._boat.get_bearing_to_waypoint()) return abs(diff) > self.get_upwind_twa() * 1.5 def need_to_gybe(self) -> bool: """ Do we need to gybe? """ - diff = calc_angle(self._boat.target_angle, self._boat._bearing) + diff = calc_angle(self._boat.target_angle, self._boat.get_bearing_to_waypoint()) return abs(diff) > (180 - self.get_downwind_twa() * 1.5) def set_update_fps(self, fps): diff --git a/src/strategies/default/binary.py b/src/strategies/default/binary.py index 17a0063..87ec439 100644 --- a/src/strategies/default/binary.py +++ b/src/strategies/default/binary.py @@ -1,5 +1,6 @@ from strategies.base import Base + class Binary(Base): def update(self): diff --git a/src/strategies/default/manual.py b/src/strategies/default/manual.py index bfb494a..61a7071 100644 --- a/src/strategies/default/manual.py +++ b/src/strategies/default/manual.py @@ -1,12 +1,15 @@ import pygame from strategies.base import Base + class Manual(Base): - STEERING_FORCE = 1 + STEERING_FORCE = 5 def update(self): pressed = pygame.key.get_pressed() if pressed[pygame.K_LEFT]: self._boat.steer(self.STEERING_FORCE) - if pressed[pygame.K_RIGHT]: + elif pressed[pygame.K_RIGHT]: self._boat.steer(-self.STEERING_FORCE) + else: + self._boat.set_target_rudder_angle(0)