Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ docs/rst
docs/sphinx
experiments/
dist/
build/
rlcard/games/doudizhu/jsondata/
rlcard/agents/gin_rummy_human_agent/gui_cards/cards_png
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _get_version():
'termcolor'
],
extras_require=extras,
requires_python='>=3.7',
python_requires='>=3.7',
classifiers=[
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.10",
Expand Down
157 changes: 157 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import os
import argparse

import torch

import rlcard
from rlcard.agents import RandomAgent
from rlcard.utils import (
get_device,
set_seed,
tournament,
reorganize,
Logger,
plot_curve,
)

def train(args):

# Check whether gpu is available
device = get_device()

# Seed numpy, torch, random
set_seed(args.seed)

# Make the environment with seed
env = rlcard.make(
args.env,
config={
'seed': args.seed,
}
)

# Initialize the agent and use random agents as opponents
if args.algorithm == 'dqn':
from rlcard.agents import DQNAgent
agent = DQNAgent(
num_actions=env.num_actions,
state_shape=env.state_shape[0],
mlp_layers=[64,64],
device=device,
)
elif args.algorithm == 'nfsp':
from rlcard.agents import NFSPAgent
agent = NFSPAgent(
num_actions=env.num_actions,
state_shape=env.state_shape[0],
hidden_layers_sizes=[64,64],
q_mlp_layers=[64,64],
device=device,
)
agents = [agent]
for _ in range(1, env.num_players):
agents.append(RandomAgent(num_actions=env.num_actions))
env.set_agents(agents)

# Start training
with Logger(args.log_dir) as logger:
for episode in range(args.num_episodes):

if args.algorithm == 'nfsp':
agents[0].sample_episode_policy()

# Generate data from the environment
trajectories, payoffs = env.run(is_training=True)

# Reorganaize the data to be state, action, reward, next_state, done
trajectories = reorganize(trajectories, payoffs)

# Feed transitions into agent memory, and train the agent
# Here, we assume that DQN always plays the first position
# and the other players play randomly (if any)
for ts in trajectories[0]:
agent.feed(ts)

# Evaluate the performance. Play with random agents.
if episode % args.evaluate_every == 0:
logger.log_performance(
episode,
tournament(
env,
args.num_eval_games,
)[0]
)

# Get the paths
csv_path, fig_path = logger.csv_path, logger.fig_path

# Plot the learning curve
plot_curve(csv_path, fig_path, args.algorithm)

# Save model
save_path = os.path.join(args.log_dir, 'model.pth')
torch.save(agent, save_path)
print('Model saved in', save_path)

if __name__ == '__main__':
parser = argparse.ArgumentParser("DQN/NFSP example in RLCard")
parser.add_argument(
'--env',
type=str,
default='leduc-holdem',
choices=[
'blackjack',
'leduc-holdem',
'limit-holdem',
'doudizhu',
'mahjong',
'no-limit-holdem',
'uno',
'gin-rummy',
'bridge',
],
)
parser.add_argument(
'--algorithm',
type=str,
default='dqn',
choices=[
'dqn',
'nfsp',
],
)
parser.add_argument(
'--cuda',
type=str,
default='',
)
parser.add_argument(
'--seed',
type=int,
default=42,
)
parser.add_argument(
'--num_episodes',
type=int,
default=5000,
)
parser.add_argument(
'--num_eval_games',
type=int,
default=2000,
)
parser.add_argument(
'--evaluate_every',
type=int,
default=100,
)
parser.add_argument(
'--log_dir',
type=str,
default='experiments/leduc_holdem_dqn_result/',
)

args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
train(args)