diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml
new file mode 100644
index 0000000..b9dfbe6
--- /dev/null
+++ b/.github/workflows/Format.yml
@@ -0,0 +1,35 @@
+name: Format
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+jobs:
+  format-check:
+    name: JuliaFormatter
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      actions: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v7
+      - uses: julia-actions/setup-julia@v3
+        with:
+          version: '1'
+      - uses: julia-actions/cache@v3
+      - name: Run JuliaFormatter
+        shell: julia --color=yes {0}
+        run: |
+          using Pkg
+          Pkg.activate(; temp=true)
+          Pkg.add(name="JuliaFormatter", version="2")
+          using JuliaFormatter
+          if !format(".", verbose=true, overwrite=false)
+              @error "Code is not formatted. Run `julia -e 'using JuliaFormatter; format(\".\")'` locally."
+              exit(1)
+          end
diff --git a/Project.toml b/Project.toml
index a4b06c5..fc60d0c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,7 +19,7 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 
 [compat]
-DecisionFocusedLearningBenchmarks = "0.5.0, 0.6"
+DecisionFocusedLearningBenchmarks = "0.6.1"
 DocStringExtensions = "0.9.5"
 Flux = "0.16.9"
 InferOpt = "0.7.1"
diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl
index d7a6250..38d9ec6 100644
--- a/src/DecisionFocusedLearningAlgorithms.jl
+++ b/src/DecisionFocusedLearningAlgorithms.jl
@@ -25,6 +25,7 @@ include("algorithms/abstract_algorithm.jl")
 include("algorithms/supervised/fyl.jl")
 include("algorithms/supervised/anticipative_imitation.jl")
 include("algorithms/supervised/dagger.jl")
+include("algorithms/mirror_descent/mirror_descent.jl")
 
 export TrainingContext
 
@@ -41,7 +42,7 @@ export AbstractMetric,
 
 export AbstractAlgorithm, AbstractImitationAlgorithm
 export PerturbedFenchelYoungLossImitation,
-    DAgger, AnticipativeImitation, train_policy!, train_policy
+    DAgger, AnticipativeImitation, train_policy!, train_policy, MirrorDescent
 export AbstractPolicy, DFLPolicy
 
 end
diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl
new file mode 100644
index 0000000..a4b65d9
--- /dev/null
+++ b/src/algorithms/mirror_descent/mirror_descent.jl
@@ -0,0 +1,239 @@
+"""
+$TYPEDEF
+
+Mirror Descent algorithm for learning coordinated solutions.
+
+This algorithm is designed for stochastic benchmarks.
+
+Reference: <https://arxiv.org/abs/2505.04757>
+
+# Fields
+$TYPEDFIELDS
+"""
+@kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm
+    "inner imitation algorithm for supervised learning"
+    inner_algorithm::A = PerturbedFenchelYoungLossImitation()
+end
+
+# Helper function to augment a dataset with anticipative solutions
+function _augment_with_anticipative(dataset, anticipative_solver)
+    return map(dataset) do sample
+        y = anticipative_solver(sample.scenario; sample.context...)
+        return DataSample(sample; y=y)
+    end
+end
+
+# Helper function to create a perturbed sample
+function _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ)
+    θ = model(sample.x)
+    signed_θ = is_minimization ? -κ * θ : κ * θ
+    y = perturbed_solver(signed_θ; scenario=sample.scenario, sample.context...)
+    return DataSample(sample; y=y)
+end
+
+# Helper function to augment a dataset with perturbed solutions
+function _augment_with_perturbed(dataset, model, perturbed_solver, is_minimization; κ=1.0)
+    return map(dataset) do sample
+        return _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ)
+    end
+end
+
+# Helper function to augment a dataset with perturbed solutions in-place
+function _augment_with_perturbed!(dataset, model, perturbed_solver, is_minimization; κ=1.0)
+    for i in eachindex(dataset)
+        dataset[i] = _perturbed_sample(
+            dataset[i], model, perturbed_solver, is_minimization, κ
+        )
+    end
+    return dataset
+end
+
+# Helper function to run the mirror descent loop for a given number of iterations
+function _mirror_descent_loop(
+    algorithm,
+    policy,
+    input_dataset,
+    perturbed_solver,
+    is_minimization;
+    md_iters,
+    epochs,
+    κ,
+    metrics,
+    verbose,
+)
+    # Allocate the perturbed dataset once. Subsequent iterations mutate in place.
+    dataset = _augment_with_perturbed(
+        input_dataset, policy.statistical_model, perturbed_solver, is_minimization; κ
+    )
+    return map(1:md_iters) do n_it
+        verbose && println("Mirror descent iteration $n_it / $md_iters")
+        if n_it > 1
+            _augment_with_perturbed!(
+                dataset, policy.statistical_model, perturbed_solver, is_minimization; κ
+            )
+        end
+        return train_policy!(algorithm.inner_algorithm, policy, dataset; epochs, metrics)
+    end
+end
+
+"""
+$TYPEDSIGNATURES
+
+Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset.
+
+When `imitation_start=true`, the first iteration is a pure imitation step using
+`anticipative_solver`; subsequent iterations are the mirror descent loop using
+`perturbed_anticipative_solver`.
+
+# Arguments
+- `iterations=10`: total number of mirror descent iterations (includes the imitation step
+when `imitation_start=true`)
+- `epochs=10`: number of inner training epochs per mirror descent iteration
+- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver
+- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm
+- `verbose=false`: if true, prints progress at each iteration
+- `imitation_start=true`: if true, run a pure imitation step against the
+  anticipative solver as the first iteration
+- `is_minimization=true`: set to false if the objective is a maximization problem
+"""
+function train_policy!(
+    algorithm::MirrorDescent,
+    policy::DFLPolicy,
+    train_dataset,
+    anticipative_solver,
+    perturbed_anticipative_solver;
+    epochs=10,
+    iterations=10,
+    κ=1.0,
+    metrics::Tuple=(),
+    verbose::Bool=false,
+    imitation_start::Bool=true,
+    is_minimization::Bool=true,
+)
+    if imitation_start
+        verbose && println("Imitation step")
+        dataset = _augment_with_anticipative(train_dataset, anticipative_solver)
+        h_imitation = train_policy!(
+            algorithm.inner_algorithm, policy, dataset; epochs, metrics
+        )
+        md_iters = iterations - 1
+        md_iters >= 1 || return [h_imitation]
+        rest = _mirror_descent_loop(
+            algorithm,
+            policy,
+            dataset,
+            perturbed_anticipative_solver,
+            is_minimization;
+            md_iters,
+            epochs,
+            κ,
+            metrics,
+            verbose,
+        )
+        return pushfirst!(rest, h_imitation)
+    end
+
+    # else
+    return _mirror_descent_loop(
+        algorithm,
+        policy,
+        train_dataset,
+        perturbed_anticipative_solver,
+        is_minimization;
+        md_iters=iterations,
+        epochs,
+        κ,
+        metrics,
+        verbose,
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm.
+
+This high-level wrapper builds every component (`model`, `maximizer`,
+`anticipative_solver`, `parametric_anticipative_solver`, `train_dataset`) from the
+benchmark, each exposed as an optional keyword so callers can override any of them
+without dropping to [`train_policy!`](@ref).
+
+# Arguments
+- `dataset_size=30`: number of samples in the training dataset
+(used when `train_dataset` is not provided)
+- `nb_scenarios=1`: number of scenarios per instance
+(used when `train_dataset` is not provided)
+- `context_per_instance=1`: number of contexts per instance
+(used when `train_dataset` is not provided)
+- `seed=nothing`: random seed for reproducibility
+(used in `model` and `train_dataset` when not provided)
+- `model`: statistical model to wrap in the policy
+(defaults to `generate_statistical_model(benchmark; seed)`)
+- `maximizer`: combinatorial oracle to wrap in the policy
+(defaults to `generate_maximizer(benchmark)`)
+- `anticipative_solver`: oracle used in pure-imitation iterations
+(defaults to `generate_anticipative_solver(benchmark)`)
+- `parametric_anticipative_solver`: parametric oracle wrapped in `PerturbedAdditive` for
+mirror-descent iterations (defaults to `generate_parametric_anticipative_solver(benchmark)`)
+- `train_dataset`: training dataset (defaults to `generate_dataset(benchmark, dataset_size; ...)`)
+- `epochs=10`: number of inner training epochs per mirror descent iteration
+- `iterations=10`: total number of mirror descent iterations
+- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver
+- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm
+- `verbose=false`: if true, prints a banner at each iteration
+- `imitation_start=true`: if true, run a pure imitation step against the anticipative solver as the
+first iteration
+"""
+function train_policy(
+    algorithm::MirrorDescent,
+    benchmark::ExogenousStochasticBenchmark;
+    dataset_size=30,
+    nb_scenarios=1,
+    context_per_instance=1,
+    seed=nothing,
+    model=generate_statistical_model(benchmark; seed=seed),
+    maximizer=generate_maximizer(benchmark),
+    anticipative_solver=generate_anticipative_solver(benchmark),
+    parametric_anticipative_solver=generate_parametric_anticipative_solver(benchmark),
+    train_dataset=generate_dataset(
+        benchmark,
+        dataset_size;
+        nb_scenarios=nb_scenarios,
+        contexts_per_instance=context_per_instance,
+        seed=seed,
+    ),
+    epochs=10,
+    iterations=10,
+    κ=1.0,
+    metrics::Tuple=(),
+    verbose::Bool=false,
+    imitation_start::Bool=true,
+)
+    policy = DFLPolicy(model, maximizer)
+
+    (; nb_samples, ε, threaded) = algorithm.inner_algorithm
+    perturbed_anticipative_solver = PerturbedAdditive(
+        (θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...);
+        ε=κ * ε,
+        nb_samples=nb_samples,
+        seed=seed,
+        threaded=threaded,
+    )
+
+    histories_per_iteration = train_policy!(
+        algorithm,
+        policy,
+        train_dataset,
+        anticipative_solver,
+        perturbed_anticipative_solver;
+        epochs=epochs,
+        iterations=iterations,
+        κ=κ,
+        metrics=metrics,
+        verbose=verbose,
+        imitation_start=imitation_start,
+        is_minimization=is_minimization_problem(benchmark),
+    )
+
+    return histories_per_iteration, policy
+end
diff --git a/test/Project.toml b/test/Project.toml
index e8eeaed..603374a 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -5,8 +5,8 @@ DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
-JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 
@@ -16,9 +16,8 @@ DecisionFocusedLearningAlgorithms = {path = ".."}
 [compat]
 Aqua = "0.8"
 DecisionFocusedLearningAlgorithms = "0.2.0"
-DecisionFocusedLearningBenchmarks = "0.5"
+DecisionFocusedLearningBenchmarks = "0.6.1"
 Documenter = "1"
-JuliaFormatter = "2"
 MLUtils = "0.4"
 Test = "1"
 ValueHistories = "0.5"
diff --git a/test/code.jl b/test/code.jl
index 3f74eb9..75c76c1 100644
--- a/test/code.jl
+++ b/test/code.jl
@@ -1,7 +1,6 @@
 using Aqua
 using Documenter
 using JET
-using JuliaFormatter
 
 using DecisionFocusedLearningAlgorithms
 
@@ -20,12 +19,6 @@ end
     )
 end
 
-@testset "JuliaFormatter" begin
-    @test JuliaFormatter.format(
-        DecisionFocusedLearningAlgorithms; verbose=false, overwrite=false
-    )
-end
-
 @testset "Documenter" begin
     Documenter.doctest(DecisionFocusedLearningAlgorithms)
 end
diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl
new file mode 100644
index 0000000..435f0e3
--- /dev/null
+++ b/test/mirror_descent.jl
@@ -0,0 +1,161 @@
+using DecisionFocusedLearningAlgorithms
+using DecisionFocusedLearningBenchmarks
+using Test
+using ValueHistories
+using Statistics: mean
+
+function _val_obj_metric(benchmark, val_data)
+    return FunctionMetric(:val_obj, val_data) do ctx, data
+        vals = map(data) do s
+            θ = ctx.policy.statistical_model(s.x)
+            y = ctx.policy.maximizer(θ; s.context...)
+            return Float64(
+                DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)
+            )
+        end
+        return (val_obj=mean(vals),)
+    end
+end
+
+@testset "MirrorDescent Training" begin
+    @testset "MirrorDescent - ContextualStochasticArgmax basic" begin
+        benchmark = ContextualStochasticArgmaxBenchmark()
+        algorithm = MirrorDescent()
+
+        histories, policy = train_policy(
+            algorithm, benchmark; dataset_size=5, epochs=2, iterations=2, seed=0
+        )
+
+        @test histories isa Vector
+        @test length(histories) == 2
+        @test all(h isa MVHistory for h in histories)
+        @test all(haskey(h, :training_loss) for h in histories)
+        @test policy isa DFLPolicy
+    end
+
+    @testset "MirrorDescent - StochasticVehicleScheduling basic" begin
+        benchmark = StochasticVehicleSchedulingBenchmark()
+        algorithm = MirrorDescent()
+
+        histories, policy = train_policy(
+            algorithm, benchmark; dataset_size=1, epochs=2, iterations=2, seed=0
+        )
+
+        @test histories isa Vector
+        @test length(histories) == 2
+        @test all(h isa MVHistory for h in histories)
+        @test all(haskey(h, :training_loss) for h in histories)
+        @test policy isa DFLPolicy
+    end
+
+    @testset "MirrorDescent - imitation_start=false" begin
+        benchmark = ContextualStochasticArgmaxBenchmark()
+        algorithm = MirrorDescent()
+
+        histories, policy = train_policy(
+            algorithm,
+            benchmark;
+            dataset_size=5,
+            epochs=2,
+            iterations=2,
+            seed=0,
+            imitation_start=false,
+        )
+
+        @test histories isa Vector
+        @test length(histories) == 2
+        @test policy isa DFLPolicy
+    end
+
+    @testset "MirrorDescent - performance improves over iterations" begin
+        benchmark = ContextualStochasticArgmaxBenchmark()
+        algorithm = MirrorDescent()
+
+        val_dataset = generate_dataset(benchmark, 100; seed=99)
+
+        val_metric = FunctionMetric(:val_obj, val_dataset) do ctx, data
+            vals = map(data) do s
+                θ = ctx.policy.statistical_model(s.x)
+                y = ctx.policy.maximizer(θ; s.context...)
+                return Float64(
+                    DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)
+                )
+            end
+            return (val_obj=mean(vals),)
+        end
+
+        histories, policy = train_policy(
+            algorithm,
+            benchmark;
+            dataset_size=20,
+            epochs=3,
+            iterations=5,
+            seed=0,
+            metrics=(val_metric,),
+        )
+
+        val_objs = [get(histories[i], :val_obj)[2][end] for i in 1:5]
+
+        # Performance should improve at each iteration
+        @test (val_objs[4] > val_objs[1])
+    end
+
+    @testset "MirrorDescent - with metrics" begin
+        benchmark = ContextualStochasticArgmaxBenchmark()
+        algorithm = MirrorDescent()
+
+        metrics = (FunctionMetric(ctx -> ctx.epoch, :epoch),)
+
+        histories, policy = train_policy(
+            algorithm,
+            benchmark;
+            dataset_size=5,
+            epochs=2,
+            iterations=2,
+            seed=0,
+            metrics=metrics,
+        )
+
+        @test all(haskey(h, :epoch) for h in histories)
+    end
+
+    @testset "MirrorDescent - trained beats untrained (ContextualStochasticArgmax)" begin
+        benchmark = ContextualStochasticArgmaxBenchmark()
+        val_data = generate_dataset(benchmark, 50; seed=99)
+
+        histories, _ = train_policy(
+            MirrorDescent(),
+            benchmark;
+            dataset_size=20,
+            epochs=5,
+            iterations=5,
+            seed=0,
+            metrics=(_val_obj_metric(benchmark, val_data),),
+        )
+
+        obj_untrained = get(histories[1], :val_obj)[2][1]
+        obj_trained = get(histories[end], :val_obj)[2][end]
+
+        @test obj_trained > obj_untrained
+    end
+
+    @testset "MirrorDescent - trained beats untrained (StochasticVehicleScheduling)" begin
+        benchmark = StochasticVehicleSchedulingBenchmark()
+        val_data = generate_dataset(benchmark, 10; seed=99)
+
+        histories, _ = train_policy(
+            MirrorDescent(),
+            benchmark;
+            dataset_size=10,
+            epochs=5,
+            iterations=2,
+            seed=0,
+            metrics=(_val_obj_metric(benchmark, val_data),),
+        )
+
+        obj_untrained = get(histories[1], :val_obj)[2][1]
+        obj_trained = get(histories[end], :val_obj)[2][end]
+
+        @test obj_trained < obj_untrained
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 02565a1..88bbf7c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -13,4 +13,8 @@ using DecisionFocusedLearningAlgorithms
     @testset "DAgger" begin
         include("dagger.jl")
     end
+
+    @testset "MirrorDescent" begin
+        include("mirror_descent.jl")
+    end
 end