diff --git a/environments/two_stage_train.py b/environments/two_stage_train.py index 3bf2764..a67470f 100644 --- a/environments/two_stage_train.py +++ b/environments/two_stage_train.py @@ -1,7 +1,10 @@ import copy from ray.rllib.env import MultiAgentEnv -from ray.rllib.agents import ppo +try: + from ray.rllib.agents import ppo +except ImportError: + from ray.rllib.algorithms import ppo from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.env.apis.task_settable_env import TaskSettableEnv import gym diff --git a/experiment_configs/cleanup-contracting.json b/experiment_configs/cleanup-contracting.json index a49e196..a793543 100644 --- a/experiment_configs/cleanup-contracting.json +++ b/experiment_configs/cleanup-contracting.json @@ -11,7 +11,7 @@ "post_fcnet_hiddens": [64,64] ,"conv_filters":[[6,[3,3],1]] }, - "wandb":true, + "wandb":false, "num_renders":5, "solver": true, "solver_samples":100 diff --git a/run_render.py b/run_render.py index e0fac4d..be5f388 100644 --- a/run_render.py +++ b/run_render.py @@ -4,7 +4,10 @@ """ import copy -from ray.rllib.agents import ppo +try: + from ray.rllib.agents import ppo +except ImportError: + from ray.rllib.algorithms import ppo from PIL import Image from utils.ray_config_utils import get_config_and_env from environments.env_utils import make_video_from_rgb_imgs diff --git a/run_solver.py b/run_solver.py index 14b5c5c..2f8d5d9 100644 --- a/run_solver.py +++ b/run_solver.py @@ -7,7 +7,10 @@ import copy -from ray.rllib.agents import ppo +try: + from ray.rllib.agents import ppo +except ImportError: + from ray.rllib.algorithms import ppo from PIL import Image from utils.ray_config_utils import get_config_and_env,get_neg_config,get_solver_config from environments.env_utils import make_video_from_rgb_imgs diff --git a/run_training.py b/run_training.py index ddb70e1..96ad147 100644 --- a/run_training.py +++ b/run_training.py @@ -66,7 +66,7 @@ def ppo_learning(params_dict,config,wb_logger) : del config['stop_cond'] analysis = ray.tune.tune.run('PPO',name=params_dict['exp_name'],stop=stop_condition, - config=config,callbacks=[wb_logger],local_dir=params_dict['results_dir'], + config=config,callbacks=[wb_logger],storage_path=params_dict['results_dir'], num_samples=1, verbose=0 ,checkpoint_freq=10, checkpoint_at_end=True) checkpoint_paths = [] diff --git a/runner.py b/runner.py index 309d2e4..30acd31 100644 --- a/runner.py +++ b/runner.py @@ -7,7 +7,7 @@ - `sequential_run(num_jobs, args, config_dict_list)`: Runs experiments sequentially with the specified number of jobs and configuration dictionaries. - `mp_run(num_jobs, total_jobs, args, config_dict_list)`: Runs experiments in parallel using multiprocessing with the specified number of jobs, total jobs, and configuration dictionaries. -The script reads configuration settings from a JSON file and generates combinations of parameters for experiments. It also handles options for using GPUs and multiprocessing. +The script reads configuration settings from a JSON filxe and generates combinations of parameters for experiments. It also handles options for using GPUs and multiprocessing. """ import multiprocessing,time, os @@ -23,13 +23,13 @@ def main_run(config,args,hook_start=True,hook_end=True) : def get_args() : parser= argparse.ArgumentParser() - parser.add_argument('--name' ,type = str , help= 'Name for campaign to run') + parser.add_argument('--name' ,type = str , default='cleanup-v1') parser.add_argument('--mp',action='store_true',default=False,help='Whether to use multiprocesssing') parser.add_argument('--workers', type = int , default=1, help = 'Only required if multiprocessing is true: Total number of workers, must be larger than the max worker requirement in the config dict') parser.add_argument('--w_per_job', type = int , default=1 , help = 'Only required if multiprocessing is true: Total number of workers, must be larger than the max worker requirement in the config dict') parser.add_argument('--gpu',action='store_true',default=False,help='Whether to use gpu') - parser.add_argument('--config_path' ,type = str , default=None , help= 'Name for campaign to run') - parser.add_argument('--results_dir',type =str, default = 'results/') + parser.add_argument('--config_path' ,type = str , default= 'experiment_configs/test.json') + parser.add_argument('--results_dir',type =str, help='add in absolute file path', default = '~/Programming/MARL_contracts/results/') parser.add_argument('--load_second_stage',type=str,default=None) parser.add_argument('--seeds',type=int,default=1) @@ -80,6 +80,7 @@ def mp_run(num_jobs,total_jobs,args,config_dict_list) : prev_alive = alive time.sleep(30) + if __name__ =='__main__' : # Get arguments args = get_args() @@ -155,7 +156,7 @@ def mp_run(num_jobs,total_jobs,args,config_dict_list) : config_dict_list = copy.deepcopy(new_config_list ) if args.name is not None : - args.name += time.strftime("_%Y-%m-%d") + args.name += time.strftime("_%Y-%m-%d-%I:%M:%S") args.results_dir += args.name os.mkdir(args.results_dir) diff --git a/utils/ray_config_utils.py b/utils/ray_config_utils.py index 689dbd4..148bfeb 100644 --- a/utils/ray_config_utils.py +++ b/utils/ray_config_utils.py @@ -21,7 +21,7 @@ import numpy as np from utils.env_creator_functions import env_creator, get_base_env_tag from utils.logger_utils import MetricsCallback -from ray.rllib.algorithms.callbacks import MultiCallbacks +from ray.rllib.algorithms.callbacks import make_multi_callbacks from ray.rllib.models import ModelCatalog from environments.Networks.vision_net import VisionNetwork @@ -148,7 +148,7 @@ def get_config_and_env(params_dict): if params_dict['shared_policy'] else (lambda agent_id, episode, worker, **kwargs: agent_id) }, - 'callbacks': MultiCallbacks([lambda : MetricsCallback()]), + 'callbacks': make_multi_callbacks([lambda : MetricsCallback()]), 'num_gpus': params_dict['num_gpus'], 'stop_cond': {'timesteps_total': params_dict.get("num_timesteps")} }