laboroai · taku-y · Aug 10, 2025 · Aug 10, 2025 · Aug 10, 2025 · Aug 10, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
       strategy:
         matrix:
           os: [ubuntu-latest, windows-latest, macOS-latest]
-          rust: [1.84.0]
+          rust: [1.85.0]
           python-version: ["3.11"]
       steps:
         - uses: actions/checkout@v2

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 
-## v0.0.8 (2025-??-??)
+## v0.0.9 (2025-??-??)
+
+### Changed
+
+* Separate the generic replaybuffer into a separate crate (`border-generic-replay-buffer`).
+
+## v0.0.8 (2025-05-17)
 
 ### Added
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,7 @@
 [workspace]
 members = [
     "border-core",
+    "border-generic-replay-buffer",
     "border-tensorboard",
     "border-mlflow-tracking",
     "border-py-gym-env",
@@ -15,14 +16,15 @@ members = [
 exclude = ["docker/", "examples/"]
 
 [workspace.package]
-version = "0.0.8"
+version = "0.0.9"
 edition = "2018"
-rust-version = "1.84"
+rust-version = "1.85"
 description = "Reinforcement learning library"
 repository = "https://github.com/laboroai/border"
 keywords = ["reinforcement", "learning", "rl"]
 categories = ["science"]
 license = "MIT OR Apache-2.0"
+readme = "README.md"
 
 [workspace.dependencies]
 clap = { version = "4.5.8", features = ["derive"] }

diff --git a/README.md b/README.md
@@ -11,18 +11,19 @@ Border consists of the following crates:
 
 * Core and utility
   * [border-core](https://crates.io/crates/border-core) ([doc](https://docs.rs/border-core/latest/border_core/)) provides basic traits and functions for environments and reinforcement learning (RL) agents.
-  * [border-tensorboard](https://crates.io/crates/border-tensorboard) ([doc](https://docs.rs/border-core/latest/border_tensorboard/)) implements the `TensorboardRecorder` struct for writing records that can be visualized in Tensorboard, based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
-  * [border-mlflow-tracking](https://crates.io/crates/border-mlflow-tracking) ([doc](https://docs.rs/border-core/latest/border_mlflow_tracking/)) provides MLflow tracking support for logging metrics during training via REST API.
-  * [border-async-trainer](https://crates.io/crates/border-async-trainer) ([doc](https://docs.rs/border-core/latest/border_async_trainer/)) defines traits and functions for asynchronous training of RL agents using multiple actors. Each actor runs a sampling process in parallel, where an agent interacts with an environment to collect samples for a shared replay buffer.
+  * [border-generic-replay-buffer](https://crates.io/crates/border-generic-replay-buffer) ([doc](https://docs.rs/border-generic-replay-buffer/latest/border_generic_replay_buffer/)) provides a generic implementation of replay buffer.
+  * [border-tensorboard](https://crates.io/crates/border-tensorboard) ([doc](https://docs.rs/border-tensorboard/latest/border_tensorboard/)) implements the `TensorboardRecorder` struct for writing records that can be visualized in Tensorboard, based on [tensorboard-rs](https://crates.io/crates/tensorboard-rs).
+  * [border-mlflow-tracking](https://crates.io/crates/border-mlflow-tracking) ([doc](https://docs.rs/border-mlflow-tracking/latest/border_mlflow_tracking/)) provides MLflow tracking support for logging metrics during training via REST API.
+  * [border-async-trainer](https://crates.io/crates/border-async-trainer) ([doc](https://docs.rs/border-async-trainer/latest/border_async_trainer/)) defines traits and functions for asynchronous training of RL agents using multiple actors. Each actor runs a sampling process in parallel, where an agent interacts with an environment to collect samples for a shared replay buffer.
   * [border](https://crates.io/crates/border) serves as a collection of examples.
 * Environment
-  * [border-py-gym-env](https://crates.io/crates/border-py-gym-env) ([doc](https://docs.rs/border-core/latest/border_py_gym_env/)) provides a wrapper for [Gymnasium](https://gymnasium.farama.org) environments written in Python.
-  * [border-atari-env](https://crates.io/crates/border-atari-env) ([doc](https://docs.rs/border-core/latest/border_atari_env/)) implements a wrapper for [atari-env](https://crates.io/crates/atari-env), which is part of [gym-rs](https://crates.io/crates/gym-rs).
-  * [border-minari](https://crates.io/crates/border-minari) ([doc](https://docs.rs/border-core/latest/border_minari/)) provides a wrapper for [Minari](https://minari.farama.org).
+  * [border-py-gym-env](https://crates.io/crates/border-py-gym-env) ([doc](https://docs.rs/border-py-gym-env/latest/border_py_gym_env/)) provides a wrapper for [Gymnasium](https://gymnasium.farama.org) environments written in Python.
+  * [border-atari-env](https://crates.io/crates/border-atari-env) ([doc](https://docs.rs/border-atari-env/latest/border_atari_env/)) implements a wrapper for [atari-env](https://crates.io/crates/atari-env), which is part of [gym-rs](https://crates.io/crates/gym-rs).
+  * [border-minari](https://crates.io/crates/border-minari) ([doc](https://docs.rs/border-minari/latest/border_minari/)) provides a wrapper for [Minari](https://minari.farama.org).
 * Agent
-  * [border-tch-agent](https://crates.io/crates/border-tch-agent) ([doc](https://docs.rs/border-core/latest/border_tch_agent/)) implements RL agents based on [tch](https://crates.io/crates/tch), including Deep Q Network (DQN), Implicit Quantile Network (IQN), and Soft Actor-Critic (SAC).
-  * [border-candle-agent](https://crates.io/crates/border-candle-agent) ([doc](https://docs.rs/border-core/latest/border_candle_agent/)) implements RL agents based on [candle](https://crates.io/crates/candle-core).
-  * [border-policy-no-backend](https://crates.io/crates/border-policy-no-backend) ([doc](https://docs.rs/border-core/latest/border_policy_no_backend/)) implements policies that are independent of any deep learning backend, such as Torch.
+  * [border-tch-agent](https://crates.io/crates/border-tch-agent) ([doc](https://docs.rs/border-tch-agent/latest/border_tch_agent/)) implements RL agents based on [tch](https://crates.io/crates/tch), including Deep Q Network (DQN), Implicit Quantile Network (IQN), and Soft Actor-Critic (SAC).
+  * [border-candle-agent](https://crates.io/crates/border-candle-agent) ([doc](https://docs.rs/border-candle-agent/latest/border_candle_agent/)) implements RL agents based on [candle](https://crates.io/crates/candle-core).
+  * [border-policy-no-backend](https://crates.io/crates/border-policy-no-backend) ([doc](https://docs.rs/border-policy-no-backend/latest/border_policy_no_backend/)) implements policies that are independent of any deep learning backend, such as Torch.
 
 ## Status
 
@@ -38,16 +39,17 @@ Docker configuration files for development and testing are available in the [dev
 
 ## License
 
-Crates                    | License
---------------------------|------------------
-`border-core`             | MIT OR Apache-2.0
-`border-tensorboard`      | MIT OR Apache-2.0
-`border-mlflow-tracking`  | MIT OR Apache-2.0
-`border-async-trainer`    | MIT OR Apache-2.0
-`border-py-gym-env`       | MIT OR Apache-2.0
-`border-atari-env`        | GPL-2.0-or-later
-`border-minari`           | MIT OR Apache-2.0
-`border-tch-agent`        | MIT OR Apache-2.0
-`border-candle-agent`     | MIT OR Apache-2.0
-`border-policy-no-backend`| MIT OR Apache-2.0
-`border`                  | GPL-2.0-or-later
+Crates                        | License
+------------------------------|------------------
+`border-core`                 | MIT OR Apache-2.0
+`border-generic-replay-buffer`| MIT OR Apache-2.0
+`border-tensorboard`          | MIT OR Apache-2.0
+`border-mlflow-tracking`      | MIT OR Apache-2.0
+`border-async-trainer`        | MIT OR Apache-2.0
+`border-py-gym-env`           | MIT OR Apache-2.0
+`border-atari-env`            | GPL-2.0-or-later
+`border-minari`               | MIT OR Apache-2.0
+`border-tch-agent`            | MIT OR Apache-2.0
+`border-candle-agent`         | MIT OR Apache-2.0
+`border-policy-no-backend`    | MIT OR Apache-2.0
+`border`                      | GPL-2.0-or-later
diff --git a/border-async-trainer/Cargo.toml b/border-async-trainer/Cargo.toml
@@ -12,8 +12,8 @@ readme = "README.md"
 [dependencies]
 anyhow = { workspace = true }
 aquamarine = { workspace = true }
-border-core = { version = "0.0.8", path = "../border-core" }
-border-tensorboard = { version = "0.0.8", path = "../border-tensorboard" }
+border-core = { version = "0.0.9", path = "../border-core" }
+border-tensorboard = { version = "0.0.9", path = "../border-tensorboard" }
 serde = { workspace = true, features = ["derive"] }
 log = { workspace = true }
 tokio = { version = "1.14.0", features = ["full"] }
@@ -24,4 +24,5 @@ thiserror = { workspace = true }
 
 [dev-dependencies]
 env_logger = { workspace = true }
+border-generic-replay-buffer = { version = "0.0.9", path = "../border-generic-replay-buffer" }
 test-log = "0.2.8"
diff --git a/border-async-trainer/README.md b/border-async-trainer/README.md
@@ -0,0 +1,160 @@
+Asynchronous trainer with parallel sampling processes.
+
+The code might look like below.
+
+```
+# use serde::{Deserialize, Serialize};
+# use border_generic_replay_buffer::test::{
+#     TestAgent, TestAgentConfig, TestEnv, TestObs, TestObsBatch,
+#     TestAct, TestActBatch
+# };
+# use border_core::Env as _;
+# use border_async_trainer::{
+#     //test::{TestAgent, TestAgentConfig, TestEnv},
+#     ActorManager, ActorManagerConfig, AsyncTrainer, AsyncTrainerConfig,
+# };
+# use border_generic_replay_buffer::{
+#     GenericReplayBuffer, GenericReplayBufferConfig,
+#     SimpleStepProcessorConfig, SimpleStepProcessor
+# };
+# use border_core::{
+#     record::{Recorder, NullRecorder}, DefaultEvaluator,
+# };
+#
+# use std::path::{Path, PathBuf};
+#
+# fn agent_config() -> TestAgentConfig {
+#     TestAgentConfig
+# }
+#
+# fn env_config() -> usize {
+#     0
+# }
+
+type Env = TestEnv;
+type ObsBatch = TestObsBatch;
+type ActBatch = TestActBatch;
+type ReplayBuffer = GenericReplayBuffer<ObsBatch, ActBatch>;
+type StepProcessor = SimpleStepProcessor<Env, ObsBatch, ActBatch>;
+
+// Create a new agent by wrapping the existing agent in order to implement SyncModel.
+struct TestAgent2(TestAgent);
+
+impl border_core::Configurable for TestAgent2 {
+    type Config = TestAgentConfig;
+
+    fn build(config: Self::Config) -> Self {
+        Self(TestAgent::build(config))
+    }
+}
+
+impl border_core::Agent<Env, ReplayBuffer> for TestAgent2 {
+    // Boilerplate code to delegate the method calls to the inner agent.
+    fn train(&mut self) {
+        self.0.train();
+     }
+
+     // For other methods ...
+#     fn is_train(&self) -> bool {
+#         self.0.is_train()
+#     }
+#
+#     fn eval(&mut self) {
+#         self.0.eval();
+#     }
+#
+#     fn opt_with_record(&mut self, buffer: &mut ReplayBuffer) -> border_core::record::Record {
+#         self.0.opt_with_record(buffer)
+#     }
+#
+#     fn save_params(&self, path: &Path) -> anyhow::Result<Vec<PathBuf>> {
+#         self.0.save_params(path)
+#     }
+#
+#     fn load_params(&mut self, path: &Path) -> anyhow::Result<()> {
+#         self.0.load_params(path)
+#     }
+#
+#     fn opt(&mut self, buffer: &mut ReplayBuffer) {
+#         self.0.opt_with_record(buffer);
+#     }
+#
+#     fn as_any_ref(&self) -> &dyn std::any::Any {
+#         self
+#     }
+#
+#     fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
+#         self
+#     }
+}
+
+impl border_core::Policy<Env> for TestAgent2 {
+      // Boilerplate code to delegate the method calls to the inner agent.
+      // ...
+#     fn sample(&mut self, obs: &TestObs) -> TestAct {
+#         self.0.sample(obs)
+#     }
+}
+
+impl border_async_trainer::SyncModel for TestAgent2{
+    // Self::ModelInfo shold include the model parameters.
+    type ModelInfo = usize;
+
+
+    fn model_info(&self) -> (usize, Self::ModelInfo) {
+        // Extracts the model parameters and returns them as Self::ModelInfo.
+        // The first element of the tuple is the number of optimization steps.
+        (0, 0)
+    }
+
+    fn sync_model(&mut self, _model_info: &Self::ModelInfo) {
+        // implements synchronization of the model based on the _model_info
+    }
+}
+
+let agent_configs: Vec<_> = vec![agent_config()];
+let env_config_train = env_config();
+let env_config_eval = env_config();
+let replay_buffer_config = GenericReplayBufferConfig::default();
+let step_proc_config = SimpleStepProcessorConfig::default();
+let actor_man_config = ActorManagerConfig::default();
+let async_trainer_config = AsyncTrainerConfig::default();
+let mut recorder: Box<dyn Recorder<_, _>> = Box::new(NullRecorder::new());
+let mut evaluator = DefaultEvaluator::<TestEnv>::new(&env_config_eval, 0, 1).unwrap();
+
+border_async_trainer::util::train_async::<TestAgent2, _, _, StepProcessor>(
+    &agent_config(),
+    &agent_configs,
+    &env_config_train,
+    &env_config_eval,
+    &step_proc_config,
+    &replay_buffer_config,
+    &actor_man_config,
+    &async_trainer_config,
+    &mut recorder,
+    &mut evaluator,
+);
+```
+
+Training process consists of the following two components:
+
+* [`ActorManager`] manages [`Actor`]s, each of which runs a thread for interacting
+  [`Agent`] and [`Env`] and taking samples. Those samples will be sent to
+  the replay buffer in [`AsyncTrainer`].
+* [`AsyncTrainer`] is responsible for training of an agent. It also runs a thread
+  for pushing samples from [`ActorManager`] into a replay buffer.
+
+The `Agent` must implement [`SyncModel`] trait in order to synchronize the model of
+the agent in [`Actor`] with the trained agent in [`AsyncTrainer`]. The trait has
+the ability to import and export the information of the model as
+[`SyncModel`]`::ModelInfo`.
+
+The `Agent` in [`AsyncTrainer`] is responsible for training, typically with a GPU,
+while the `Agent`s in [`Actor`]s in [`ActorManager`] is responsible for sampling
+using CPU.
+
+Both [`AsyncTrainer`] and [`ActorManager`] are running in the same machine and
+communicate by channels.
+
+[`Agent`]: border_core::Agent
+[`Env`]: border_core::Env
diff --git a/border-async-trainer/src/actor/base.rs b/border-async-trainer/src/actor/base.rs
@@ -1,6 +1,6 @@
 use crate::{ActorStat, PushedItemMessage, ReplayBufferProxy, ReplayBufferProxyConfig, SyncModel};
 use border_core::{
-    Agent, Configurable, Env, ExperienceBufferBase, ReplayBufferBase, Sampler, StepProcessor,
+    Agent, Configurable, Env, ExperienceBuffer, ReplayBuffer, Sampler, StepProcessor,
 };
 use crossbeam_channel::Sender;
 use log::{debug, info};
@@ -21,7 +21,7 @@ use std::{
 ///     B-->|Env::Obs|A
 ///     B-->|Step&ltE: Env&gt|C[StepProcessor]
 ///   end
-///   C-->|ReplayBufferBase::PushedItem|F[ReplayBufferProxy]
+///   C-->|ReplayBuffer::PushedItem|F[ReplayBufferProxy]
 /// ```
 ///
 /// In [`Actor`], an [`Agent`] runs on an [`Env`] and generates [`Step`] objects.
@@ -41,7 +41,7 @@ where
     A: Agent<E, R> + Configurable + SyncModel + 'static,
     E: Env,
     P: StepProcessor<E>,
-    R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
+    R: ExperienceBuffer<Item = P::Output> + ReplayBuffer,
 {
     /// Stops sampling process if this field is set to `true`.
     id: usize,
@@ -60,7 +60,7 @@ where
     A: Agent<E, R> + Configurable + SyncModel + 'static,
     E: Env,
     P: StepProcessor<E>,
-    R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
+    R: ExperienceBuffer<Item = P::Output> + ReplayBuffer,
 {
     pub fn build(
         id: usize,

diff --git a/border-async-trainer/src/actor_manager/base.rs b/border-async-trainer/src/actor_manager/base.rs
@@ -1,9 +1,7 @@
 use crate::{
     Actor, ActorManagerConfig, ActorStat, PushedItemMessage, ReplayBufferProxyConfig, SyncModel,
 };
-use border_core::{
-    Agent, Configurable, Env, ExperienceBufferBase, ReplayBufferBase, StepProcessor,
-};
+use border_core::{Agent, Configurable, Env, ExperienceBuffer, ReplayBuffer, StepProcessor};
 use crossbeam_channel::{bounded, /*unbounded,*/ Receiver, Sender};
 use log::info;
 use std::{
@@ -25,7 +23,7 @@ where
     A: Agent<E, R> + Configurable + SyncModel,
     E: Env,
     P: StepProcessor<E>,
-    R: ExperienceBufferBase<Item = P::Output> + ReplayBufferBase,
+    R: ExperienceBuffer<Item = P::Output> + ReplayBuffer,
 {
     /// Configurations of [`Agent`]s.
     agent_configs: Vec<A::Config>,
@@ -72,7 +70,7 @@ where
     A: Agent<E, R> + Configurable + SyncModel + 'static,
     E: Env,
     P: StepProcessor<E>,
-    R: ExperienceBufferBase<Item = P::Output> + Send + 'static + ReplayBufferBase,
+    R: ExperienceBuffer<Item = P::Output> + Send + 'static + ReplayBuffer,
     A::Config: Send + 'static,
     E::Config: Send + 'static,
     P::Config: Send + 'static,