From c21492ca40771be3b4f5b881105fb76461dbc52e Mon Sep 17 00:00:00 2001 From: sdelannoypavy Date: Fri, 29 May 2026 22:18:35 +0200 Subject: [PATCH 1/9] add mirror descent --- .../MirrorDescent/mirror_descent.jl | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/algorithms/MirrorDescent/mirror_descent.jl diff --git a/src/algorithms/MirrorDescent/mirror_descent.jl b/src/algorithms/MirrorDescent/mirror_descent.jl new file mode 100644 index 0000000..c167b7c --- /dev/null +++ b/src/algorithms/MirrorDescent/mirror_descent.jl @@ -0,0 +1,139 @@ +""" +$TYPEDEF + +Mirror Descent algorithm for learning coordinated solutions. + +This algorithm is designed for stochastic benchmarks. + +Reference: + +# Fields +$TYPEDFIELDS +""" +@kwdef struct MirrorDescent{A} <: AbstractImitationAlgorithm + "inner imitation algorithm for supervised learning" + inner_algorithm::A = PerturbedFenchelYoungLossImitation() +end + +""" +$TYPEDSIGNATURES +Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. + +# Core training method +""" + + +function train_policy( + algorithm::MirrorDescent, + benchmark::ExogenousStochasticBenchmark; + dataset_size=30, + epochs=10, + iterations=10, + κ = 1.0, + metrics::Tuple=(), + seed=nothing, +) + + train_dataset = generate_dataset(benchmark, dataset_size; seed=seed) + + # Initialize model and create policy + model = generate_statistical_model(benchmark; seed=seed) + maximizer = generate_maximizer(benchmark) + policy = DFLPolicy(model, maximizer) + + # vector because we store one history per iteration + histories_per_iteration = MVHistory[] + + anticipative_solver = generate_anticipative_solver(benchmark;) + parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) + + # perturb = true correspond to "real" iterations of mirror descent + # we compute solutions with the penalized anticipative solver + perturbation + + # perturb = false correspond to imitation learning + # we use the anticipative solver without perturbation + # usefull to start with one iteration of pure imitation learning + perturb = false + + # Train policy + for n_it in 1:iterations + println("Iteration $n_it / $iterations") + + if n_it > 1 + perturb = true + end + + + # Generate anticipative solutions as training data + augmented_dataset = augment_dataset( + algorithm.inner_algorithm, benchmark, train_dataset, model, maximizer, anticipative_solver, parametric_anticipative_solver; + κ = κ, perturb = perturb + ) + + + # Train policy on augmented dataset + history = train_policy!( + algorithm.inner_algorithm, + policy, + augmented_dataset; + epochs = epochs, + metrics = metrics, + maximizer_kwargs=sample -> sample.context, + ) + + push!(histories_per_iteration, history) + end + + return histories_per_iteration, policy +end + + +function augment_dataset( + algorithm::PerturbedFenchelYoungLossImitation, + bench::ExogenousStochasticBenchmark, + train_dataset::AbstractArray, + model, + maximizer, + anticipative_solver, + parametric_anticipative_solver; + κ = 1.0, + perturb = false +) + + (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm + + augmented_dataset = Vector{DataSample}() + + if perturb + perturbed_maximizer = PerturbedAdditive( + parametric_anticipative_solver; ε=κ*ε, nb_samples=nb_samples + ) + end + + + for sample in train_dataset + + θ = model(sample.x) + + if perturb + if is_minimization_problem(bench) + y = perturbed_maximizer(-κ*θ; scenario = sample.scenario, context = sample) + else + y = perturbed_maximizer(κ*θ; scenario = sample.scenario, context = sample) + end + else + y = anticipative_solver(sample.scenario; context = sample) + end + + augmented_datasample = DataSample(; + x = sample.x, + y, + instance = sample.context, + extra = sample.extra + ) + + push!(augmented_dataset, augmented_datasample) + end + + return augmented_dataset +end \ No newline at end of file From 2c13ca07fb8cb3bf49d3fcd5fe063b069591e5d2 Mon Sep 17 00:00:00 2001 From: sdelannoypavy Date: Fri, 29 May 2026 22:49:13 +0200 Subject: [PATCH 2/9] corrected bug --- src/DecisionFocusedLearningAlgorithms.jl | 3 ++- test.jl | 26 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test.jl diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl index d7a6250..19fdf70 100644 --- a/src/DecisionFocusedLearningAlgorithms.jl +++ b/src/DecisionFocusedLearningAlgorithms.jl @@ -25,6 +25,7 @@ include("algorithms/abstract_algorithm.jl") include("algorithms/supervised/fyl.jl") include("algorithms/supervised/anticipative_imitation.jl") include("algorithms/supervised/dagger.jl") +include("algorithms/MirrorDescent/mirror_descent.jl") export TrainingContext @@ -41,7 +42,7 @@ export AbstractMetric, export AbstractAlgorithm, AbstractImitationAlgorithm export PerturbedFenchelYoungLossImitation, - DAgger, AnticipativeImitation, train_policy!, train_policy + DAgger, AnticipativeImitation, train_policy!, train_policy, MirrorDescent export AbstractPolicy, DFLPolicy end diff --git a/test.jl b/test.jl new file mode 100644 index 0000000..dadf851 --- /dev/null +++ b/test.jl @@ -0,0 +1,26 @@ +# To be used to visualize loss across iterations + +using DecisionFocusedLearningAlgorithms +using DecisionFocusedLearningBenchmarks + +benchmark = ContextualStochasticArgmaxBenchmark() + +anticipative_solver = generate_anticipative_solver(benchmark) +algorithm = DecisionFocusedLearningAlgorithms.MirrorDescent() + +κ = 0.1 +train_dataset_size = 5 +nb_epochs = 2 +nb_iterations = 2 +seed = 3 + +histories_r, _ = DecisionFocusedLearningAlgorithms.train_policy( + algorithm, benchmark; + dataset_size = train_dataset_size, + epochs = nb_epochs, + iterations = nb_iterations, + seed = seed, + κ = κ, +) + + From 7635f1fdeac040e3e595e30e2b42592e7c248410 Mon Sep 17 00:00:00 2001 From: sdelannoypavy Date: Fri, 29 May 2026 23:01:38 +0200 Subject: [PATCH 3/9] corrected bug --- src/algorithms/MirrorDescent/mirror_descent.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/algorithms/MirrorDescent/mirror_descent.jl b/src/algorithms/MirrorDescent/mirror_descent.jl index c167b7c..2f282e2 100644 --- a/src/algorithms/MirrorDescent/mirror_descent.jl +++ b/src/algorithms/MirrorDescent/mirror_descent.jl @@ -66,7 +66,7 @@ function train_policy( # Generate anticipative solutions as training data augmented_dataset = augment_dataset( - algorithm.inner_algorithm, benchmark, train_dataset, model, maximizer, anticipative_solver, parametric_anticipative_solver; + algorithm.inner_algorithm, benchmark, train_dataset, model, anticipative_solver, parametric_anticipative_solver; κ = κ, perturb = perturb ) @@ -90,10 +90,9 @@ end function augment_dataset( algorithm::PerturbedFenchelYoungLossImitation, - bench::ExogenousStochasticBenchmark, + bench::AbstractStochasticBenchmark, train_dataset::AbstractArray, model, - maximizer, anticipative_solver, parametric_anticipative_solver; κ = 1.0, From d9ad94d361dbedf06e111c083a4a2e0a9d45d71e Mon Sep 17 00:00:00 2001 From: sdelannoypavy Date: Fri, 29 May 2026 23:11:14 +0200 Subject: [PATCH 4/9] Delete test.jl --- test.jl | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 test.jl diff --git a/test.jl b/test.jl deleted file mode 100644 index dadf851..0000000 --- a/test.jl +++ /dev/null @@ -1,26 +0,0 @@ -# To be used to visualize loss across iterations - -using DecisionFocusedLearningAlgorithms -using DecisionFocusedLearningBenchmarks - -benchmark = ContextualStochasticArgmaxBenchmark() - -anticipative_solver = generate_anticipative_solver(benchmark) -algorithm = DecisionFocusedLearningAlgorithms.MirrorDescent() - -κ = 0.1 -train_dataset_size = 5 -nb_epochs = 2 -nb_iterations = 2 -seed = 3 - -histories_r, _ = DecisionFocusedLearningAlgorithms.train_policy( - algorithm, benchmark; - dataset_size = train_dataset_size, - epochs = nb_epochs, - iterations = nb_iterations, - seed = seed, - κ = κ, -) - - From 0f7347bc6a686db3094d6eb1b93acc87bb4092c7 Mon Sep 17 00:00:00 2001 From: sdelannoypavy Date: Mon, 8 Jun 2026 11:47:00 +0200 Subject: [PATCH 5/9] Fix review comments --- Project.toml | 14 +- .../mirror_descent/mirror_descent.jl | 196 ++++++++++++++++++ test/mirror_descent.jl | 95 +++++++++ 3 files changed, 301 insertions(+), 4 deletions(-) create mode 100644 src/algorithms/mirror_descent/mirror_descent.jl create mode 100644 test/mirror_descent.jl diff --git a/Project.toml b/Project.toml index a4b06c5..2418b61 100644 --- a/Project.toml +++ b/Project.toml @@ -1,17 +1,17 @@ name = "DecisionFocusedLearningAlgorithms" uuid = "46d52364-bc3b-4fac-a992-eb1d3ef2de15" -version = "0.2.0" authors = ["Members of JuliaDecisionFocusedLearning and contributors"] - -[workspace] -projects = ["docs", "test"] +version = "0.2.0" [deps] DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -21,12 +21,18 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" [compat] DecisionFocusedLearningBenchmarks = "0.5.0, 0.6" DocStringExtensions = "0.9.5" +Documenter = "1.17.0" Flux = "0.16.9" InferOpt = "0.7.1" +Literate = "2.21.0" MLUtils = "0.4.8" +Plots = "1.41.6" ProgressMeter = "1.11.0" Random = "1.11.0" Statistics = "1.11.1" UnicodePlots = "3.8.2" ValueHistories = "0.5.6" julia = "1.11" + +[workspace] +projects = ["docs", "test"] diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl new file mode 100644 index 0000000..b0847cb --- /dev/null +++ b/src/algorithms/mirror_descent/mirror_descent.jl @@ -0,0 +1,196 @@ +""" +$TYPEDEF + +Mirror Descent algorithm for learning coordinated solutions. + +This algorithm is designed for stochastic benchmarks. + +Reference: + +# Fields +$TYPEDFIELDS +""" +@kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm + "inner imitation algorithm for supervised learning" + inner_algorithm::A = PerturbedFenchelYoungLossImitation() +end + +""" +$TYPEDSIGNATURES + +Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset. + +# Core training method + +# Arguments +- `epochs`: number of training epochs per iteration +- `iterations`: number of mirror descent iterations +- `κ`: scaling factor for the perturbation magnitude +- `metrics`: tuple of metrics to track during training +- `verbose`: if true, prints progress at each iteration +- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) +""" + +function train_policy!( + benchmark::ExogenousStochasticBenchmark, + algorithm::MirrorDescent, + policy::DFLPolicy, + train_dataset, + anticipative_solver, + perturbed_anticipative_solver; + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + verbose::Bool=false, + imitation_start::Bool=true +) + + augmented_dataset = train_dataset + return map(1:iterations) do n_it + if verbose + println("Iteration $n_it / $iterations") + end + + perturb = n_it > 1 || !imitation_start + + augmented_dataset = augment_dataset( + benchmark, augmented_dataset, policy.statistical_model, anticipative_solver, perturbed_anticipative_solver; + κ=κ, perturb=perturb + ) + + train_policy!( + algorithm.inner_algorithm, + policy, + augmented_dataset; + epochs=epochs, + metrics=metrics, + maximizer_kwargs=sample -> sample.context, + ) + end +end + +""" +$TYPEDSIGNATURES + +Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. + +# Benchmark convenience wrapper + +This high-level function handles all setup from the benchmark and returns a trained policy. + +# Arguments +- `dataset_size`: number of samples in the training dataset +- `epochs`: number of training epochs per iteration +- `iterations`: number of mirror descent iterations +- `κ`: scaling factor for the perturbation magnitude +- `metrics`: tuple of metrics to track during training +- `seed`: random seed for reproducibility +- `verbose`: if true, prints progress at each iteration +- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) +- `model_kwargs`: additional keyword arguments passed to `generate_statistical_model` +- `maximizer_kwargs`: additional keyword arguments passed to `generate_maximizer` +- `solver_kwargs`: additional keyword arguments passed to `generate_anticipative_solver` and `generate_parametric_anticipative_solver` +- `nb_scenarios`: number of scenarios per instance. +- `context_per_instance`: number of contexts per instance. +""" + + + +function train_policy( + algorithm::MirrorDescent, + benchmark::ExogenousStochasticBenchmark; + dataset_size=30, + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + seed=nothing, + verbose::Bool=false, + imitation_start::Bool=true, + model_kwargs=(;), + maximizer_kwargs=(;), + solver_kwargs=(;), + nb_scenarios = 1, + context_per_instance = 1, +) + train_dataset = generate_dataset(benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed) + + model = generate_statistical_model(benchmark; seed=seed, model_kwargs...) + maximizer = generate_maximizer(benchmark; maximizer_kwargs...) + policy = DFLPolicy(model, maximizer) + + anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...) + parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark; solver_kwargs...) + (; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm + perturbed_anticipative_solver = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples, seed=seed, threaded=threaded) + + + histories_per_iteration = train_policy!( + benchmark, algorithm, policy, train_dataset, anticipative_solver, perturbed_anticipative_solver; + epochs=epochs, iterations=iterations, κ=κ, metrics=metrics, verbose=verbose, imitation_start=imitation_start + ) + + return histories_per_iteration, policy +end + +function augment_dataset( + bench::ExogenousStochasticBenchmark, + train_dataset::AbstractArray, + model, + anticipative_solver, + perturbed_anticipative_solver; + κ=1.0, + perturb=false +) + return _augment_dataset( + Val(fieldtype(eltype(train_dataset), :y) !== Nothing), + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=κ, perturb=perturb + ) +end + +# Raw dataset (samples have no y) → create new DataSamples +function _augment_dataset( + ::Val{false}, + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=1.0, perturb=false +) + return map(train_dataset) do sample + θ = model(sample.x) + if perturb + if is_minimization_problem(bench) + y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + else + y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + end + else + y = anticipative_solver(sample.scenario; sample.context...) + end + DataSample(sample; y=y) + end +end + +# Augmented dataset (samples already have y) → update y in place +function _augment_dataset( + ::Val{true}, + bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; + κ=1.0, perturb=false +) + for (i, sample) in enumerate(train_dataset) + θ = model(sample.x) + if perturb + if is_minimization_problem(bench) + y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + else + y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + end + else + y = anticipative_solver(sample.scenario; sample.context...) + end + ET = eltype(sample.y) + y_converted = convert(typeof(sample.y), ET <: Integer ? round.(ET, y) : y) + train_dataset[i] = DataSample(sample; y=y_converted) + end + return train_dataset +end \ No newline at end of file diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl new file mode 100644 index 0000000..0a42cc3 --- /dev/null +++ b/test/mirror_descent.jl @@ -0,0 +1,95 @@ +using DecisionFocusedLearningAlgorithms +using DecisionFocusedLearningBenchmarks +using Test +using ValueHistories +using Statistics: mean + +@testset "MirrorDescent Training" begin + + @testset "MirrorDescent - ContextualStochasticArgmax basic" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - StochasticVehicleScheduling basic" begin + benchmark = StochasticVehicleSchedulingBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=1, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - imitation_start=false" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0, imitation_start=false + ) + + @test histories isa Vector + @test length(histories) == 2 + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - performance improves over iterations" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + val_dataset = generate_dataset(benchmark, 100; seed=99) + + val_metric = FunctionMetric(:val_obj, val_dataset) do ctx, data + vals = map(data) do s + θ = ctx.policy.statistical_model(s.x) + y = ctx.policy.maximizer(θ; s.context...) + Float64(DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)) + end + (val_obj = mean(vals),) + end + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=20, epochs=3, iterations=5, seed=0, metrics=(val_metric,) + ) + + val_objs = [get(histories[i], :val_obj)[2][end] for i in 1:5] + + # Performance should improve at each iteration + @test (val_objs[4] > val_objs[1]) + end + + @testset "MirrorDescent - with metrics" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + metrics = (FunctionMetric(ctx -> ctx.epoch, :epoch),) + + histories, policy = train_policy( + algorithm, benchmark; + dataset_size=5, epochs=2, iterations=2, seed=0, metrics=metrics + ) + + @test all(haskey(h, :epoch) for h in histories) + end + +end From 5da6cc99525bec91c0f3a20a75ed513d7e9aa328 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Fri, 19 Jun 2026 17:57:42 +0200 Subject: [PATCH 6/9] style: formatting + cleanup useless dependencies --- Project.toml | 12 +-- .../MirrorDescent/mirror_descent.jl | 47 ++++---- .../mirror_descent/mirror_descent.jl | 100 +++++++++++++----- test/mirror_descent.jl | 37 ++++--- 4 files changed, 123 insertions(+), 73 deletions(-) diff --git a/Project.toml b/Project.toml index 2418b61..1a71616 100644 --- a/Project.toml +++ b/Project.toml @@ -1,15 +1,16 @@ name = "DecisionFocusedLearningAlgorithms" uuid = "46d52364-bc3b-4fac-a992-eb1d3ef2de15" -authors = ["Members of JuliaDecisionFocusedLearning and contributors"] version = "0.2.0" +authors = ["Members of JuliaDecisionFocusedLearning and contributors"] + +[workspace] +projects = ["docs", "test"] [deps] DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" -Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" @@ -21,10 +22,8 @@ ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" [compat] DecisionFocusedLearningBenchmarks = "0.5.0, 0.6" DocStringExtensions = "0.9.5" -Documenter = "1.17.0" Flux = "0.16.9" InferOpt = "0.7.1" -Literate = "2.21.0" MLUtils = "0.4.8" Plots = "1.41.6" ProgressMeter = "1.11.0" @@ -33,6 +32,3 @@ Statistics = "1.11.1" UnicodePlots = "3.8.2" ValueHistories = "0.5.6" julia = "1.11" - -[workspace] -projects = ["docs", "test"] diff --git a/src/algorithms/MirrorDescent/mirror_descent.jl b/src/algorithms/MirrorDescent/mirror_descent.jl index 2f282e2..3f5ee0c 100644 --- a/src/algorithms/MirrorDescent/mirror_descent.jl +++ b/src/algorithms/MirrorDescent/mirror_descent.jl @@ -22,18 +22,16 @@ Generate a dataset for the provided benchmark and train a DFLPolicy using the Mi # Core training method """ - function train_policy( algorithm::MirrorDescent, benchmark::ExogenousStochasticBenchmark; dataset_size=30, epochs=10, iterations=10, - κ = 1.0, + κ=1.0, metrics::Tuple=(), seed=nothing, ) - train_dataset = generate_dataset(benchmark, dataset_size; seed=seed) # Initialize model and create policy @@ -44,8 +42,8 @@ function train_policy( # vector because we store one history per iteration histories_per_iteration = MVHistory[] - anticipative_solver = generate_anticipative_solver(benchmark;) - parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) + anticipative_solver = generate_anticipative_solver(benchmark;) + parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) # perturb = true correspond to "real" iterations of mirror descent # we compute solutions with the penalized anticipative solver + perturbation @@ -63,21 +61,25 @@ function train_policy( perturb = true end - # Generate anticipative solutions as training data augmented_dataset = augment_dataset( - algorithm.inner_algorithm, benchmark, train_dataset, model, anticipative_solver, parametric_anticipative_solver; - κ = κ, perturb = perturb + algorithm.inner_algorithm, + benchmark, + train_dataset, + model, + anticipative_solver, + parametric_anticipative_solver; + κ=κ, + perturb=perturb, ) - # Train policy on augmented dataset history = train_policy!( algorithm.inner_algorithm, policy, augmented_dataset; - epochs = epochs, - metrics = metrics, + epochs=epochs, + metrics=metrics, maximizer_kwargs=sample -> sample.context, ) @@ -87,7 +89,6 @@ function train_policy( return histories_per_iteration, policy end - function augment_dataset( algorithm::PerturbedFenchelYoungLossImitation, bench::AbstractStochasticBenchmark, @@ -95,44 +96,38 @@ function augment_dataset( model, anticipative_solver, parametric_anticipative_solver; - κ = 1.0, - perturb = false + κ=1.0, + perturb=false, ) - (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm augmented_dataset = Vector{DataSample}() if perturb perturbed_maximizer = PerturbedAdditive( - parametric_anticipative_solver; ε=κ*ε, nb_samples=nb_samples + parametric_anticipative_solver; ε=κ * ε, nb_samples=nb_samples ) end - for sample in train_dataset - θ = model(sample.x) if perturb if is_minimization_problem(bench) - y = perturbed_maximizer(-κ*θ; scenario = sample.scenario, context = sample) + y = perturbed_maximizer(-κ * θ; scenario=sample.scenario, context=sample) else - y = perturbed_maximizer(κ*θ; scenario = sample.scenario, context = sample) + y = perturbed_maximizer(κ * θ; scenario=sample.scenario, context=sample) end else - y = anticipative_solver(sample.scenario; context = sample) + y = anticipative_solver(sample.scenario; context=sample) end augmented_datasample = DataSample(; - x = sample.x, - y, - instance = sample.context, - extra = sample.extra + x=sample.x, y, instance=sample.context, extra=sample.extra ) push!(augmented_dataset, augmented_datasample) end return augmented_dataset -end \ No newline at end of file +end diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl index b0847cb..d773c85 100644 --- a/src/algorithms/mirror_descent/mirror_descent.jl +++ b/src/algorithms/mirror_descent/mirror_descent.jl @@ -43,9 +43,8 @@ function train_policy!( κ=1.0, metrics::Tuple=(), verbose::Bool=false, - imitation_start::Bool=true + imitation_start::Bool=true, ) - augmented_dataset = train_dataset return map(1:iterations) do n_it if verbose @@ -55,8 +54,13 @@ function train_policy!( perturb = n_it > 1 || !imitation_start augmented_dataset = augment_dataset( - benchmark, augmented_dataset, policy.statistical_model, anticipative_solver, perturbed_anticipative_solver; - κ=κ, perturb=perturb + benchmark, + augmented_dataset, + policy.statistical_model, + anticipative_solver, + perturbed_anticipative_solver; + κ=κ, + perturb=perturb, ) train_policy!( @@ -95,8 +99,6 @@ This high-level function handles all setup from the benchmark and returns a trai - `context_per_instance`: number of contexts per instance. """ - - function train_policy( algorithm::MirrorDescent, benchmark::ExogenousStochasticBenchmark; @@ -111,24 +113,47 @@ function train_policy( model_kwargs=(;), maximizer_kwargs=(;), solver_kwargs=(;), - nb_scenarios = 1, - context_per_instance = 1, + nb_scenarios=1, + context_per_instance=1, ) - train_dataset = generate_dataset(benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed) + train_dataset = generate_dataset( + benchmark, + dataset_size; + nb_scenarios=nb_scenarios, + contexts_per_instance=context_per_instance, + seed=seed, + ) model = generate_statistical_model(benchmark; seed=seed, model_kwargs...) maximizer = generate_maximizer(benchmark; maximizer_kwargs...) policy = DFLPolicy(model, maximizer) anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...) - parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark; solver_kwargs...) + parametric_anticipative_solver = generate_parametric_anticipative_solver( + benchmark; solver_kwargs... + ) (; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm - perturbed_anticipative_solver = PerturbedAdditive((θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ*ε, nb_samples=nb_samples, seed=seed, threaded=threaded) - + perturbed_anticipative_solver = PerturbedAdditive( + (θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); + ε=κ * ε, + nb_samples=nb_samples, + seed=seed, + threaded=threaded, + ) histories_per_iteration = train_policy!( - benchmark, algorithm, policy, train_dataset, anticipative_solver, perturbed_anticipative_solver; - epochs=epochs, iterations=iterations, κ=κ, metrics=metrics, verbose=verbose, imitation_start=imitation_start + benchmark, + algorithm, + policy, + train_dataset, + anticipative_solver, + perturbed_anticipative_solver; + epochs=epochs, + iterations=iterations, + κ=κ, + metrics=metrics, + verbose=verbose, + imitation_start=imitation_start, ) return histories_per_iteration, policy @@ -141,28 +166,42 @@ function augment_dataset( anticipative_solver, perturbed_anticipative_solver; κ=1.0, - perturb=false + perturb=false, ) return _augment_dataset( Val(fieldtype(eltype(train_dataset), :y) !== Nothing), - bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; - κ=κ, perturb=perturb + bench, + train_dataset, + model, + anticipative_solver, + perturbed_anticipative_solver; + κ=κ, + perturb=perturb, ) end # Raw dataset (samples have no y) → create new DataSamples function _augment_dataset( ::Val{false}, - bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; - κ=1.0, perturb=false + bench, + train_dataset, + model, + anticipative_solver, + perturbed_anticipative_solver; + κ=1.0, + perturb=false, ) return map(train_dataset) do sample θ = model(sample.x) if perturb if is_minimization_problem(bench) - y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + y = perturbed_anticipative_solver( + -κ * θ; scenario=sample.scenario, sample.context... + ) else - y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + y = perturbed_anticipative_solver( + κ * θ; scenario=sample.scenario, sample.context... + ) end else y = anticipative_solver(sample.scenario; sample.context...) @@ -174,16 +213,25 @@ end # Augmented dataset (samples already have y) → update y in place function _augment_dataset( ::Val{true}, - bench, train_dataset, model, anticipative_solver, perturbed_anticipative_solver; - κ=1.0, perturb=false + bench, + train_dataset, + model, + anticipative_solver, + perturbed_anticipative_solver; + κ=1.0, + perturb=false, ) for (i, sample) in enumerate(train_dataset) θ = model(sample.x) if perturb if is_minimization_problem(bench) - y = perturbed_anticipative_solver(-κ*θ; scenario=sample.scenario, sample.context...) + y = perturbed_anticipative_solver( + -κ * θ; scenario=sample.scenario, sample.context... + ) else - y = perturbed_anticipative_solver(κ*θ; scenario=sample.scenario, sample.context...) + y = perturbed_anticipative_solver( + κ * θ; scenario=sample.scenario, sample.context... + ) end else y = anticipative_solver(sample.scenario; sample.context...) @@ -193,4 +241,4 @@ function _augment_dataset( train_dataset[i] = DataSample(sample; y=y_converted) end return train_dataset -end \ No newline at end of file +end diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl index 0a42cc3..3373c64 100644 --- a/test/mirror_descent.jl +++ b/test/mirror_descent.jl @@ -5,14 +5,12 @@ using ValueHistories using Statistics: mean @testset "MirrorDescent Training" begin - @testset "MirrorDescent - ContextualStochasticArgmax basic" begin benchmark = ContextualStochasticArgmaxBenchmark() algorithm = MirrorDescent() histories, policy = train_policy( - algorithm, benchmark; - dataset_size=5, epochs=2, iterations=2, seed=0 + algorithm, benchmark; dataset_size=5, epochs=2, iterations=2, seed=0 ) @test histories isa Vector @@ -27,8 +25,7 @@ using Statistics: mean algorithm = MirrorDescent() histories, policy = train_policy( - algorithm, benchmark; - dataset_size=1, epochs=2, iterations=2, seed=0 + algorithm, benchmark; dataset_size=1, epochs=2, iterations=2, seed=0 ) @test histories isa Vector @@ -43,8 +40,13 @@ using Statistics: mean algorithm = MirrorDescent() histories, policy = train_policy( - algorithm, benchmark; - dataset_size=5, epochs=2, iterations=2, seed=0, imitation_start=false + algorithm, + benchmark; + dataset_size=5, + epochs=2, + iterations=2, + seed=0, + imitation_start=false, ) @test histories isa Vector @@ -64,12 +66,17 @@ using Statistics: mean y = ctx.policy.maximizer(θ; s.context...) Float64(DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)) end - (val_obj = mean(vals),) + (val_obj=mean(vals),) end histories, policy = train_policy( - algorithm, benchmark; - dataset_size=20, epochs=3, iterations=5, seed=0, metrics=(val_metric,) + algorithm, + benchmark; + dataset_size=20, + epochs=3, + iterations=5, + seed=0, + metrics=(val_metric,), ) val_objs = [get(histories[i], :val_obj)[2][end] for i in 1:5] @@ -85,11 +92,15 @@ using Statistics: mean metrics = (FunctionMetric(ctx -> ctx.epoch, :epoch),) histories, policy = train_policy( - algorithm, benchmark; - dataset_size=5, epochs=2, iterations=2, seed=0, metrics=metrics + algorithm, + benchmark; + dataset_size=5, + epochs=2, + iterations=2, + seed=0, + metrics=metrics, ) @test all(haskey(h, :epoch) for h in histories) end - end From e3d6cca027ec1bcb10a2eff1a91a2b784479a0fe Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Sun, 21 Jun 2026 03:02:22 +0200 Subject: [PATCH 7/9] refactor: reorganize the code and fix failing tests --- Project.toml | 4 +- src/DecisionFocusedLearningAlgorithms.jl | 2 +- .../MirrorDescent/mirror_descent.jl | 133 -------- .../mirror_descent/mirror_descent.jl | 305 +++++++++--------- test/Project.toml | 1 - test/mirror_descent.jl | 6 +- 6 files changed, 156 insertions(+), 295 deletions(-) delete mode 100644 src/algorithms/MirrorDescent/mirror_descent.jl diff --git a/Project.toml b/Project.toml index 1a71616..fc60d0c 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,6 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -20,12 +19,11 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" [compat] -DecisionFocusedLearningBenchmarks = "0.5.0, 0.6" +DecisionFocusedLearningBenchmarks = "0.6.1" DocStringExtensions = "0.9.5" Flux = "0.16.9" InferOpt = "0.7.1" MLUtils = "0.4.8" -Plots = "1.41.6" ProgressMeter = "1.11.0" Random = "1.11.0" Statistics = "1.11.1" diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl index 19fdf70..38d9ec6 100644 --- a/src/DecisionFocusedLearningAlgorithms.jl +++ b/src/DecisionFocusedLearningAlgorithms.jl @@ -25,7 +25,7 @@ include("algorithms/abstract_algorithm.jl") include("algorithms/supervised/fyl.jl") include("algorithms/supervised/anticipative_imitation.jl") include("algorithms/supervised/dagger.jl") -include("algorithms/MirrorDescent/mirror_descent.jl") +include("algorithms/mirror_descent/mirror_descent.jl") export TrainingContext diff --git a/src/algorithms/MirrorDescent/mirror_descent.jl b/src/algorithms/MirrorDescent/mirror_descent.jl deleted file mode 100644 index 3f5ee0c..0000000 --- a/src/algorithms/MirrorDescent/mirror_descent.jl +++ /dev/null @@ -1,133 +0,0 @@ -""" -$TYPEDEF - -Mirror Descent algorithm for learning coordinated solutions. - -This algorithm is designed for stochastic benchmarks. - -Reference: - -# Fields -$TYPEDFIELDS -""" -@kwdef struct MirrorDescent{A} <: AbstractImitationAlgorithm - "inner imitation algorithm for supervised learning" - inner_algorithm::A = PerturbedFenchelYoungLossImitation() -end - -""" -$TYPEDSIGNATURES -Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. - -# Core training method -""" - -function train_policy( - algorithm::MirrorDescent, - benchmark::ExogenousStochasticBenchmark; - dataset_size=30, - epochs=10, - iterations=10, - κ=1.0, - metrics::Tuple=(), - seed=nothing, -) - train_dataset = generate_dataset(benchmark, dataset_size; seed=seed) - - # Initialize model and create policy - model = generate_statistical_model(benchmark; seed=seed) - maximizer = generate_maximizer(benchmark) - policy = DFLPolicy(model, maximizer) - - # vector because we store one history per iteration - histories_per_iteration = MVHistory[] - - anticipative_solver = generate_anticipative_solver(benchmark;) - parametric_anticipative_solver = generate_parametric_anticipative_solver(benchmark;) - - # perturb = true correspond to "real" iterations of mirror descent - # we compute solutions with the penalized anticipative solver + perturbation - - # perturb = false correspond to imitation learning - # we use the anticipative solver without perturbation - # usefull to start with one iteration of pure imitation learning - perturb = false - - # Train policy - for n_it in 1:iterations - println("Iteration $n_it / $iterations") - - if n_it > 1 - perturb = true - end - - # Generate anticipative solutions as training data - augmented_dataset = augment_dataset( - algorithm.inner_algorithm, - benchmark, - train_dataset, - model, - anticipative_solver, - parametric_anticipative_solver; - κ=κ, - perturb=perturb, - ) - - # Train policy on augmented dataset - history = train_policy!( - algorithm.inner_algorithm, - policy, - augmented_dataset; - epochs=epochs, - metrics=metrics, - maximizer_kwargs=sample -> sample.context, - ) - - push!(histories_per_iteration, history) - end - - return histories_per_iteration, policy -end - -function augment_dataset( - algorithm::PerturbedFenchelYoungLossImitation, - bench::AbstractStochasticBenchmark, - train_dataset::AbstractArray, - model, - anticipative_solver, - parametric_anticipative_solver; - κ=1.0, - perturb=false, -) - (; nb_samples, ε, threaded, training_optimizer, seed) = algorithm - - augmented_dataset = Vector{DataSample}() - - if perturb - perturbed_maximizer = PerturbedAdditive( - parametric_anticipative_solver; ε=κ * ε, nb_samples=nb_samples - ) - end - - for sample in train_dataset - θ = model(sample.x) - - if perturb - if is_minimization_problem(bench) - y = perturbed_maximizer(-κ * θ; scenario=sample.scenario, context=sample) - else - y = perturbed_maximizer(κ * θ; scenario=sample.scenario, context=sample) - end - else - y = anticipative_solver(sample.scenario; context=sample) - end - - augmented_datasample = DataSample(; - x=sample.x, y, instance=sample.context, extra=sample.extra - ) - - push!(augmented_dataset, augmented_datasample) - end - - return augmented_dataset -end diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl index d773c85..a4b65d9 100644 --- a/src/algorithms/mirror_descent/mirror_descent.jl +++ b/src/algorithms/mirror_descent/mirror_descent.jl @@ -15,24 +15,88 @@ $TYPEDFIELDS inner_algorithm::A = PerturbedFenchelYoungLossImitation() end +# Helper function to augment a dataset with anticipative solutions +function _augment_with_anticipative(dataset, anticipative_solver) + return map(dataset) do sample + y = anticipative_solver(sample.scenario; sample.context...) + return DataSample(sample; y=y) + end +end + +# Helper function to create a perturbed sample +function _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ) + θ = model(sample.x) + signed_θ = is_minimization ? -κ * θ : κ * θ + y = perturbed_solver(signed_θ; scenario=sample.scenario, sample.context...) + return DataSample(sample; y=y) +end + +# Helper function to augment a dataset with perturbed solutions +function _augment_with_perturbed(dataset, model, perturbed_solver, is_minimization; κ=1.0) + return map(dataset) do sample + return _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ) + end +end + +# Helper function to augment a dataset with perturbed solutions in-place +function _augment_with_perturbed!(dataset, model, perturbed_solver, is_minimization; κ=1.0) + for i in eachindex(dataset) + dataset[i] = _perturbed_sample( + dataset[i], model, perturbed_solver, is_minimization, κ + ) + end + return dataset +end + +# Helper function to run the mirror descent loop for a given number of iterations +function _mirror_descent_loop( + algorithm, + policy, + input_dataset, + perturbed_solver, + is_minimization; + md_iters, + epochs, + κ, + metrics, + verbose, +) + # Allocate the perturbed dataset once. Subsequent iterations mutate in place. + dataset = _augment_with_perturbed( + input_dataset, policy.statistical_model, perturbed_solver, is_minimization; κ + ) + return map(1:md_iters) do n_it + verbose && println("Mirror descent iteration $n_it / $md_iters") + if n_it > 1 + _augment_with_perturbed!( + dataset, policy.statistical_model, perturbed_solver, is_minimization; κ + ) + end + return train_policy!(algorithm.inner_algorithm, policy, dataset; epochs, metrics) + end +end + """ $TYPEDSIGNATURES Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset. -# Core training method +When `imitation_start=true`, the first iteration is a pure imitation step using +`anticipative_solver`; subsequent iterations are the mirror descent loop using +`perturbed_anticipative_solver`. # Arguments -- `epochs`: number of training epochs per iteration -- `iterations`: number of mirror descent iterations -- `κ`: scaling factor for the perturbation magnitude -- `metrics`: tuple of metrics to track during training -- `verbose`: if true, prints progress at each iteration -- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) +- `iterations=10`: total number of mirror descent iterations (includes the imitation step +when `imitation_start=true`) +- `epochs=10`: number of inner training epochs per mirror descent iteration +- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver +- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm +- `verbose=false`: if true, prints progress at each iteration +- `imitation_start=true`: if true, run a pure imitation step against the + anticipative solver as the first iteration +- `is_minimization=true`: set to false if the objective is a maximization problem """ - function train_policy!( - benchmark::ExogenousStochasticBenchmark, algorithm::MirrorDescent, policy::DFLPolicy, train_dataset, @@ -44,34 +108,44 @@ function train_policy!( metrics::Tuple=(), verbose::Bool=false, imitation_start::Bool=true, + is_minimization::Bool=true, ) - augmented_dataset = train_dataset - return map(1:iterations) do n_it - if verbose - println("Iteration $n_it / $iterations") - end - - perturb = n_it > 1 || !imitation_start - - augmented_dataset = augment_dataset( - benchmark, - augmented_dataset, - policy.statistical_model, - anticipative_solver, - perturbed_anticipative_solver; - κ=κ, - perturb=perturb, + if imitation_start + verbose && println("Imitation step") + dataset = _augment_with_anticipative(train_dataset, anticipative_solver) + h_imitation = train_policy!( + algorithm.inner_algorithm, policy, dataset; epochs, metrics ) - - train_policy!( - algorithm.inner_algorithm, + md_iters = iterations - 1 + md_iters >= 1 || return [h_imitation] + rest = _mirror_descent_loop( + algorithm, policy, - augmented_dataset; - epochs=epochs, - metrics=metrics, - maximizer_kwargs=sample -> sample.context, + dataset, + perturbed_anticipative_solver, + is_minimization; + md_iters, + epochs, + κ, + metrics, + verbose, ) + return pushfirst!(rest, h_imitation) end + + # else + return _mirror_descent_loop( + algorithm, + policy, + train_dataset, + perturbed_anticipative_solver, + is_minimization; + md_iters=iterations, + epochs, + κ, + metrics, + verbose, + ) end """ @@ -79,60 +153,65 @@ $TYPEDSIGNATURES Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. -# Benchmark convenience wrapper - -This high-level function handles all setup from the benchmark and returns a trained policy. +This high-level wrapper builds every component (`model`, `maximizer`, +`anticipative_solver`, `parametric_anticipative_solver`, `train_dataset`) from the +benchmark, each exposed as an optional keyword so callers can override any of them +without dropping to [`train_policy!`](@ref). # Arguments -- `dataset_size`: number of samples in the training dataset -- `epochs`: number of training epochs per iteration -- `iterations`: number of mirror descent iterations -- `κ`: scaling factor for the perturbation magnitude -- `metrics`: tuple of metrics to track during training -- `seed`: random seed for reproducibility -- `verbose`: if true, prints progress at each iteration -- `imitation_start`: if true, the first iteration uses pure imitation learning (no perturbation) -- `model_kwargs`: additional keyword arguments passed to `generate_statistical_model` -- `maximizer_kwargs`: additional keyword arguments passed to `generate_maximizer` -- `solver_kwargs`: additional keyword arguments passed to `generate_anticipative_solver` and `generate_parametric_anticipative_solver` -- `nb_scenarios`: number of scenarios per instance. -- `context_per_instance`: number of contexts per instance. +- `dataset_size=30`: number of samples in the training dataset +(used when `train_dataset` is not provided) +- `nb_scenarios=1`: number of scenarios per instance +(used when `train_dataset` is not provided) +- `context_per_instance=1`: number of contexts per instance +(used when `train_dataset` is not provided) +- `seed=nothing`: random seed for reproducibility +(used in `model` and `train_dataset` when not provided) +- `model`: statistical model to wrap in the policy +(defaults to `generate_statistical_model(benchmark; seed)`) +- `maximizer`: combinatorial oracle to wrap in the policy +(defaults to `generate_maximizer(benchmark)`) +- `anticipative_solver`: oracle used in pure-imitation iterations +(defaults to `generate_anticipative_solver(benchmark)`) +- `parametric_anticipative_solver`: parametric oracle wrapped in `PerturbedAdditive` for +mirror-descent iterations (defaults to `generate_parametric_anticipative_solver(benchmark)`) +- `train_dataset`: training dataset (defaults to `generate_dataset(benchmark, dataset_size; ...)`) +- `epochs=10`: number of inner training epochs per mirror descent iteration +- `iterations=10`: total number of mirror descent iterations +- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver +- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm +- `verbose=false`: if true, prints a banner at each iteration +- `imitation_start=true`: if true, run a pure imitation step against the anticipative solver as the +first iteration """ - function train_policy( algorithm::MirrorDescent, benchmark::ExogenousStochasticBenchmark; dataset_size=30, - epochs=10, - iterations=10, - κ=1.0, - metrics::Tuple=(), - seed=nothing, - verbose::Bool=false, - imitation_start::Bool=true, - model_kwargs=(;), - maximizer_kwargs=(;), - solver_kwargs=(;), nb_scenarios=1, context_per_instance=1, -) - train_dataset = generate_dataset( + seed=nothing, + model=generate_statistical_model(benchmark; seed=seed), + maximizer=generate_maximizer(benchmark), + anticipative_solver=generate_anticipative_solver(benchmark), + parametric_anticipative_solver=generate_parametric_anticipative_solver(benchmark), + train_dataset=generate_dataset( benchmark, dataset_size; nb_scenarios=nb_scenarios, contexts_per_instance=context_per_instance, seed=seed, - ) - - model = generate_statistical_model(benchmark; seed=seed, model_kwargs...) - maximizer = generate_maximizer(benchmark; maximizer_kwargs...) + ), + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + verbose::Bool=false, + imitation_start::Bool=true, +) policy = DFLPolicy(model, maximizer) - anticipative_solver = generate_anticipative_solver(benchmark; solver_kwargs...) - parametric_anticipative_solver = generate_parametric_anticipative_solver( - benchmark; solver_kwargs... - ) - (; nb_samples, ε, threaded, seed) = algorithm.inner_algorithm + (; nb_samples, ε, threaded) = algorithm.inner_algorithm perturbed_anticipative_solver = PerturbedAdditive( (θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); ε=κ * ε, @@ -142,7 +221,6 @@ function train_policy( ) histories_per_iteration = train_policy!( - benchmark, algorithm, policy, train_dataset, @@ -154,91 +232,8 @@ function train_policy( metrics=metrics, verbose=verbose, imitation_start=imitation_start, + is_minimization=is_minimization_problem(benchmark), ) return histories_per_iteration, policy end - -function augment_dataset( - bench::ExogenousStochasticBenchmark, - train_dataset::AbstractArray, - model, - anticipative_solver, - perturbed_anticipative_solver; - κ=1.0, - perturb=false, -) - return _augment_dataset( - Val(fieldtype(eltype(train_dataset), :y) !== Nothing), - bench, - train_dataset, - model, - anticipative_solver, - perturbed_anticipative_solver; - κ=κ, - perturb=perturb, - ) -end - -# Raw dataset (samples have no y) → create new DataSamples -function _augment_dataset( - ::Val{false}, - bench, - train_dataset, - model, - anticipative_solver, - perturbed_anticipative_solver; - κ=1.0, - perturb=false, -) - return map(train_dataset) do sample - θ = model(sample.x) - if perturb - if is_minimization_problem(bench) - y = perturbed_anticipative_solver( - -κ * θ; scenario=sample.scenario, sample.context... - ) - else - y = perturbed_anticipative_solver( - κ * θ; scenario=sample.scenario, sample.context... - ) - end - else - y = anticipative_solver(sample.scenario; sample.context...) - end - DataSample(sample; y=y) - end -end - -# Augmented dataset (samples already have y) → update y in place -function _augment_dataset( - ::Val{true}, - bench, - train_dataset, - model, - anticipative_solver, - perturbed_anticipative_solver; - κ=1.0, - perturb=false, -) - for (i, sample) in enumerate(train_dataset) - θ = model(sample.x) - if perturb - if is_minimization_problem(bench) - y = perturbed_anticipative_solver( - -κ * θ; scenario=sample.scenario, sample.context... - ) - else - y = perturbed_anticipative_solver( - κ * θ; scenario=sample.scenario, sample.context... - ) - end - else - y = anticipative_solver(sample.scenario; sample.context...) - end - ET = eltype(sample.y) - y_converted = convert(typeof(sample.y), ET <: Integer ? round.(ET, y) : y) - train_dataset[i] = DataSample(sample; y=y_converted) - end - return train_dataset -end diff --git a/test/Project.toml b/test/Project.toml index 6940310..b596e5f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -15,7 +15,6 @@ DecisionFocusedLearningAlgorithms = {path = ".."} [compat] Aqua = "0.8" -DecisionFocusedLearningBenchmarks = "0.5" Documenter = "1" JuliaFormatter = "2" MLUtils = "0.4" diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl index 3373c64..171eb1d 100644 --- a/test/mirror_descent.jl +++ b/test/mirror_descent.jl @@ -64,9 +64,11 @@ using Statistics: mean vals = map(data) do s θ = ctx.policy.statistical_model(s.x) y = ctx.policy.maximizer(θ; s.context...) - Float64(DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y)) + return Float64( + DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y) + ) end - (val_obj=mean(vals),) + return (val_obj=mean(vals),) end histories, policy = train_policy( From 521ae9d326ce8d86cfa0a2f516b2a369345ff647 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Sun, 21 Jun 2026 11:38:41 +0200 Subject: [PATCH 8/9] test: connect and improve tests --- test/Project.toml | 1 + test/mirror_descent.jl | 53 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 4 ++++ 3 files changed, 58 insertions(+) diff --git a/test/Project.toml b/test/Project.toml index b596e5f..261fc39 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -7,6 +7,7 @@ InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl index 171eb1d..435f0e3 100644 --- a/test/mirror_descent.jl +++ b/test/mirror_descent.jl @@ -4,6 +4,19 @@ using Test using ValueHistories using Statistics: mean +function _val_obj_metric(benchmark, val_data) + return FunctionMetric(:val_obj, val_data) do ctx, data + vals = map(data) do s + θ = ctx.policy.statistical_model(s.x) + y = ctx.policy.maximizer(θ; s.context...) + return Float64( + DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y) + ) + end + return (val_obj=mean(vals),) + end +end + @testset "MirrorDescent Training" begin @testset "MirrorDescent - ContextualStochasticArgmax basic" begin benchmark = ContextualStochasticArgmaxBenchmark() @@ -105,4 +118,44 @@ using Statistics: mean @test all(haskey(h, :epoch) for h in histories) end + + @testset "MirrorDescent - trained beats untrained (ContextualStochasticArgmax)" begin + benchmark = ContextualStochasticArgmaxBenchmark() + val_data = generate_dataset(benchmark, 50; seed=99) + + histories, _ = train_policy( + MirrorDescent(), + benchmark; + dataset_size=20, + epochs=5, + iterations=5, + seed=0, + metrics=(_val_obj_metric(benchmark, val_data),), + ) + + obj_untrained = get(histories[1], :val_obj)[2][1] + obj_trained = get(histories[end], :val_obj)[2][end] + + @test obj_trained > obj_untrained + end + + @testset "MirrorDescent - trained beats untrained (StochasticVehicleScheduling)" begin + benchmark = StochasticVehicleSchedulingBenchmark() + val_data = generate_dataset(benchmark, 10; seed=99) + + histories, _ = train_policy( + MirrorDescent(), + benchmark; + dataset_size=10, + epochs=5, + iterations=2, + seed=0, + metrics=(_val_obj_metric(benchmark, val_data),), + ) + + obj_untrained = get(histories[1], :val_obj)[2][1] + obj_trained = get(histories[end], :val_obj)[2][end] + + @test obj_trained < obj_untrained + end end diff --git a/test/runtests.jl b/test/runtests.jl index 02565a1..88bbf7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,4 +13,8 @@ using DecisionFocusedLearningAlgorithms @testset "DAgger" begin include("dagger.jl") end + + @testset "MirrorDescent" begin + include("mirror_descent.jl") + end end From 502c76e6057a29b317a12536d1f58af0ff745227 Mon Sep 17 00:00:00 2001 From: BatyLeo Date: Sun, 21 Jun 2026 12:08:16 +0200 Subject: [PATCH 9/9] ci: move formatting test to its own ci job, and only run it on latest julia version --- .github/workflows/Format.yml | 35 +++++++++++++++++++++++++++++++++++ test/Project.toml | 2 -- test/code.jl | 7 ------- 3 files changed, 35 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/Format.yml diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 0000000..b9dfbe6 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,35 @@ +name: Format +on: + push: + branches: + - main + pull_request: + workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + format-check: + name: JuliaFormatter + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + actions: write + contents: read + steps: + - uses: actions/checkout@v7 + - uses: julia-actions/setup-julia@v3 + with: + version: '1' + - uses: julia-actions/cache@v3 + - name: Run JuliaFormatter + shell: julia --color=yes {0} + run: | + using Pkg + Pkg.activate(; temp=true) + Pkg.add(name="JuliaFormatter", version="2") + using JuliaFormatter + if !format(".", verbose=true, overwrite=false) + @error "Code is not formatted. Run `julia -e 'using JuliaFormatter; format(\".\")'` locally." + exit(1) + end diff --git a/test/Project.toml b/test/Project.toml index 0a6a919..603374a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,7 +5,6 @@ DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" -JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -19,7 +18,6 @@ Aqua = "0.8" DecisionFocusedLearningAlgorithms = "0.2.0" DecisionFocusedLearningBenchmarks = "0.6.1" Documenter = "1" -JuliaFormatter = "2" MLUtils = "0.4" Test = "1" ValueHistories = "0.5" diff --git a/test/code.jl b/test/code.jl index 3f74eb9..75c76c1 100644 --- a/test/code.jl +++ b/test/code.jl @@ -1,7 +1,6 @@ using Aqua using Documenter using JET -using JuliaFormatter using DecisionFocusedLearningAlgorithms @@ -20,12 +19,6 @@ end ) end -@testset "JuliaFormatter" begin - @test JuliaFormatter.format( - DecisionFocusedLearningAlgorithms; verbose=false, overwrite=false - ) -end - @testset "Documenter" begin Documenter.doctest(DecisionFocusedLearningAlgorithms) end