diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 0000000..b9dfbe6 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,35 @@ +name: Format +on: + push: + branches: + - main + pull_request: + workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + format-check: + name: JuliaFormatter + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + actions: write + contents: read + steps: + - uses: actions/checkout@v7 + - uses: julia-actions/setup-julia@v3 + with: + version: '1' + - uses: julia-actions/cache@v3 + - name: Run JuliaFormatter + shell: julia --color=yes {0} + run: | + using Pkg + Pkg.activate(; temp=true) + Pkg.add(name="JuliaFormatter", version="2") + using JuliaFormatter + if !format(".", verbose=true, overwrite=false) + @error "Code is not formatted. Run `julia -e 'using JuliaFormatter; format(\".\")'` locally." + exit(1) + end diff --git a/Project.toml b/Project.toml index a4b06c5..fc60d0c 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" [compat] -DecisionFocusedLearningBenchmarks = "0.5.0, 0.6" +DecisionFocusedLearningBenchmarks = "0.6.1" DocStringExtensions = "0.9.5" Flux = "0.16.9" InferOpt = "0.7.1" diff --git a/src/DecisionFocusedLearningAlgorithms.jl b/src/DecisionFocusedLearningAlgorithms.jl index d7a6250..38d9ec6 100644 --- a/src/DecisionFocusedLearningAlgorithms.jl +++ b/src/DecisionFocusedLearningAlgorithms.jl @@ -25,6 +25,7 @@ include("algorithms/abstract_algorithm.jl") include("algorithms/supervised/fyl.jl") include("algorithms/supervised/anticipative_imitation.jl") include("algorithms/supervised/dagger.jl") +include("algorithms/mirror_descent/mirror_descent.jl") export TrainingContext @@ -41,7 +42,7 @@ export AbstractMetric, export AbstractAlgorithm, AbstractImitationAlgorithm export PerturbedFenchelYoungLossImitation, - DAgger, AnticipativeImitation, train_policy!, train_policy + DAgger, AnticipativeImitation, train_policy!, train_policy, MirrorDescent export AbstractPolicy, DFLPolicy end diff --git a/src/algorithms/mirror_descent/mirror_descent.jl b/src/algorithms/mirror_descent/mirror_descent.jl new file mode 100644 index 0000000..a4b65d9 --- /dev/null +++ b/src/algorithms/mirror_descent/mirror_descent.jl @@ -0,0 +1,239 @@ +""" +$TYPEDEF + +Mirror Descent algorithm for learning coordinated solutions. + +This algorithm is designed for stochastic benchmarks. + +Reference: + +# Fields +$TYPEDFIELDS +""" +@kwdef struct MirrorDescent{A<:PerturbedFenchelYoungLossImitation} <: AbstractAlgorithm + "inner imitation algorithm for supervised learning" + inner_algorithm::A = PerturbedFenchelYoungLossImitation() +end + +# Helper function to augment a dataset with anticipative solutions +function _augment_with_anticipative(dataset, anticipative_solver) + return map(dataset) do sample + y = anticipative_solver(sample.scenario; sample.context...) + return DataSample(sample; y=y) + end +end + +# Helper function to create a perturbed sample +function _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ) + θ = model(sample.x) + signed_θ = is_minimization ? -κ * θ : κ * θ + y = perturbed_solver(signed_θ; scenario=sample.scenario, sample.context...) + return DataSample(sample; y=y) +end + +# Helper function to augment a dataset with perturbed solutions +function _augment_with_perturbed(dataset, model, perturbed_solver, is_minimization; κ=1.0) + return map(dataset) do sample + return _perturbed_sample(sample, model, perturbed_solver, is_minimization, κ) + end +end + +# Helper function to augment a dataset with perturbed solutions in-place +function _augment_with_perturbed!(dataset, model, perturbed_solver, is_minimization; κ=1.0) + for i in eachindex(dataset) + dataset[i] = _perturbed_sample( + dataset[i], model, perturbed_solver, is_minimization, κ + ) + end + return dataset +end + +# Helper function to run the mirror descent loop for a given number of iterations +function _mirror_descent_loop( + algorithm, + policy, + input_dataset, + perturbed_solver, + is_minimization; + md_iters, + epochs, + κ, + metrics, + verbose, +) + # Allocate the perturbed dataset once. Subsequent iterations mutate in place. + dataset = _augment_with_perturbed( + input_dataset, policy.statistical_model, perturbed_solver, is_minimization; κ + ) + return map(1:md_iters) do n_it + verbose && println("Mirror descent iteration $n_it / $md_iters") + if n_it > 1 + _augment_with_perturbed!( + dataset, policy.statistical_model, perturbed_solver, is_minimization; κ + ) + end + return train_policy!(algorithm.inner_algorithm, policy, dataset; epochs, metrics) + end +end + +""" +$TYPEDSIGNATURES + +Train a DFLPolicy using the Mirror Descent algorithm on a provided training dataset. + +When `imitation_start=true`, the first iteration is a pure imitation step using +`anticipative_solver`; subsequent iterations are the mirror descent loop using +`perturbed_anticipative_solver`. + +# Arguments +- `iterations=10`: total number of mirror descent iterations (includes the imitation step +when `imitation_start=true`) +- `epochs=10`: number of inner training epochs per mirror descent iteration +- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver +- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm +- `verbose=false`: if true, prints progress at each iteration +- `imitation_start=true`: if true, run a pure imitation step against the + anticipative solver as the first iteration +- `is_minimization=true`: set to false if the objective is a maximization problem +""" +function train_policy!( + algorithm::MirrorDescent, + policy::DFLPolicy, + train_dataset, + anticipative_solver, + perturbed_anticipative_solver; + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + verbose::Bool=false, + imitation_start::Bool=true, + is_minimization::Bool=true, +) + if imitation_start + verbose && println("Imitation step") + dataset = _augment_with_anticipative(train_dataset, anticipative_solver) + h_imitation = train_policy!( + algorithm.inner_algorithm, policy, dataset; epochs, metrics + ) + md_iters = iterations - 1 + md_iters >= 1 || return [h_imitation] + rest = _mirror_descent_loop( + algorithm, + policy, + dataset, + perturbed_anticipative_solver, + is_minimization; + md_iters, + epochs, + κ, + metrics, + verbose, + ) + return pushfirst!(rest, h_imitation) + end + + # else + return _mirror_descent_loop( + algorithm, + policy, + train_dataset, + perturbed_anticipative_solver, + is_minimization; + md_iters=iterations, + epochs, + κ, + metrics, + verbose, + ) +end + +""" +$TYPEDSIGNATURES + +Generate a dataset for the provided benchmark and train a DFLPolicy using the Mirror Descent algorithm. + +This high-level wrapper builds every component (`model`, `maximizer`, +`anticipative_solver`, `parametric_anticipative_solver`, `train_dataset`) from the +benchmark, each exposed as an optional keyword so callers can override any of them +without dropping to [`train_policy!`](@ref). + +# Arguments +- `dataset_size=30`: number of samples in the training dataset +(used when `train_dataset` is not provided) +- `nb_scenarios=1`: number of scenarios per instance +(used when `train_dataset` is not provided) +- `context_per_instance=1`: number of contexts per instance +(used when `train_dataset` is not provided) +- `seed=nothing`: random seed for reproducibility +(used in `model` and `train_dataset` when not provided) +- `model`: statistical model to wrap in the policy +(defaults to `generate_statistical_model(benchmark; seed)`) +- `maximizer`: combinatorial oracle to wrap in the policy +(defaults to `generate_maximizer(benchmark)`) +- `anticipative_solver`: oracle used in pure-imitation iterations +(defaults to `generate_anticipative_solver(benchmark)`) +- `parametric_anticipative_solver`: parametric oracle wrapped in `PerturbedAdditive` for +mirror-descent iterations (defaults to `generate_parametric_anticipative_solver(benchmark)`) +- `train_dataset`: training dataset (defaults to `generate_dataset(benchmark, dataset_size; ...)`) +- `epochs=10`: number of inner training epochs per mirror descent iteration +- `iterations=10`: total number of mirror descent iterations +- `κ=1.0`: scaling factor applied to `θ` before passing it to the perturbed solver +- `metrics::Tuple=()`: metrics forwarded to the inner training algorithm +- `verbose=false`: if true, prints a banner at each iteration +- `imitation_start=true`: if true, run a pure imitation step against the anticipative solver as the +first iteration +""" +function train_policy( + algorithm::MirrorDescent, + benchmark::ExogenousStochasticBenchmark; + dataset_size=30, + nb_scenarios=1, + context_per_instance=1, + seed=nothing, + model=generate_statistical_model(benchmark; seed=seed), + maximizer=generate_maximizer(benchmark), + anticipative_solver=generate_anticipative_solver(benchmark), + parametric_anticipative_solver=generate_parametric_anticipative_solver(benchmark), + train_dataset=generate_dataset( + benchmark, + dataset_size; + nb_scenarios=nb_scenarios, + contexts_per_instance=context_per_instance, + seed=seed, + ), + epochs=10, + iterations=10, + κ=1.0, + metrics::Tuple=(), + verbose::Bool=false, + imitation_start::Bool=true, +) + policy = DFLPolicy(model, maximizer) + + (; nb_samples, ε, threaded) = algorithm.inner_algorithm + perturbed_anticipative_solver = PerturbedAdditive( + (θ; scenario, kwargs...) -> parametric_anticipative_solver(θ, scenario; kwargs...); + ε=κ * ε, + nb_samples=nb_samples, + seed=seed, + threaded=threaded, + ) + + histories_per_iteration = train_policy!( + algorithm, + policy, + train_dataset, + anticipative_solver, + perturbed_anticipative_solver; + epochs=epochs, + iterations=iterations, + κ=κ, + metrics=metrics, + verbose=verbose, + imitation_start=imitation_start, + is_minimization=is_minimization_problem(benchmark), + ) + + return histories_per_iteration, policy +end diff --git a/test/Project.toml b/test/Project.toml index e8eeaed..603374a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,8 +5,8 @@ DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" -JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7" @@ -16,9 +16,8 @@ DecisionFocusedLearningAlgorithms = {path = ".."} [compat] Aqua = "0.8" DecisionFocusedLearningAlgorithms = "0.2.0" -DecisionFocusedLearningBenchmarks = "0.5" +DecisionFocusedLearningBenchmarks = "0.6.1" Documenter = "1" -JuliaFormatter = "2" MLUtils = "0.4" Test = "1" ValueHistories = "0.5" diff --git a/test/code.jl b/test/code.jl index 3f74eb9..75c76c1 100644 --- a/test/code.jl +++ b/test/code.jl @@ -1,7 +1,6 @@ using Aqua using Documenter using JET -using JuliaFormatter using DecisionFocusedLearningAlgorithms @@ -20,12 +19,6 @@ end ) end -@testset "JuliaFormatter" begin - @test JuliaFormatter.format( - DecisionFocusedLearningAlgorithms; verbose=false, overwrite=false - ) -end - @testset "Documenter" begin Documenter.doctest(DecisionFocusedLearningAlgorithms) end diff --git a/test/mirror_descent.jl b/test/mirror_descent.jl new file mode 100644 index 0000000..435f0e3 --- /dev/null +++ b/test/mirror_descent.jl @@ -0,0 +1,161 @@ +using DecisionFocusedLearningAlgorithms +using DecisionFocusedLearningBenchmarks +using Test +using ValueHistories +using Statistics: mean + +function _val_obj_metric(benchmark, val_data) + return FunctionMetric(:val_obj, val_data) do ctx, data + vals = map(data) do s + θ = ctx.policy.statistical_model(s.x) + y = ctx.policy.maximizer(θ; s.context...) + return Float64( + DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y) + ) + end + return (val_obj=mean(vals),) + end +end + +@testset "MirrorDescent Training" begin + @testset "MirrorDescent - ContextualStochasticArgmax basic" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; dataset_size=5, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - StochasticVehicleScheduling basic" begin + benchmark = StochasticVehicleSchedulingBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, benchmark; dataset_size=1, epochs=2, iterations=2, seed=0 + ) + + @test histories isa Vector + @test length(histories) == 2 + @test all(h isa MVHistory for h in histories) + @test all(haskey(h, :training_loss) for h in histories) + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - imitation_start=false" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + histories, policy = train_policy( + algorithm, + benchmark; + dataset_size=5, + epochs=2, + iterations=2, + seed=0, + imitation_start=false, + ) + + @test histories isa Vector + @test length(histories) == 2 + @test policy isa DFLPolicy + end + + @testset "MirrorDescent - performance improves over iterations" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + val_dataset = generate_dataset(benchmark, 100; seed=99) + + val_metric = FunctionMetric(:val_obj, val_dataset) do ctx, data + vals = map(data) do s + θ = ctx.policy.statistical_model(s.x) + y = ctx.policy.maximizer(θ; s.context...) + return Float64( + DecisionFocusedLearningBenchmarks.objective_value(benchmark, s, y) + ) + end + return (val_obj=mean(vals),) + end + + histories, policy = train_policy( + algorithm, + benchmark; + dataset_size=20, + epochs=3, + iterations=5, + seed=0, + metrics=(val_metric,), + ) + + val_objs = [get(histories[i], :val_obj)[2][end] for i in 1:5] + + # Performance should improve at each iteration + @test (val_objs[4] > val_objs[1]) + end + + @testset "MirrorDescent - with metrics" begin + benchmark = ContextualStochasticArgmaxBenchmark() + algorithm = MirrorDescent() + + metrics = (FunctionMetric(ctx -> ctx.epoch, :epoch),) + + histories, policy = train_policy( + algorithm, + benchmark; + dataset_size=5, + epochs=2, + iterations=2, + seed=0, + metrics=metrics, + ) + + @test all(haskey(h, :epoch) for h in histories) + end + + @testset "MirrorDescent - trained beats untrained (ContextualStochasticArgmax)" begin + benchmark = ContextualStochasticArgmaxBenchmark() + val_data = generate_dataset(benchmark, 50; seed=99) + + histories, _ = train_policy( + MirrorDescent(), + benchmark; + dataset_size=20, + epochs=5, + iterations=5, + seed=0, + metrics=(_val_obj_metric(benchmark, val_data),), + ) + + obj_untrained = get(histories[1], :val_obj)[2][1] + obj_trained = get(histories[end], :val_obj)[2][end] + + @test obj_trained > obj_untrained + end + + @testset "MirrorDescent - trained beats untrained (StochasticVehicleScheduling)" begin + benchmark = StochasticVehicleSchedulingBenchmark() + val_data = generate_dataset(benchmark, 10; seed=99) + + histories, _ = train_policy( + MirrorDescent(), + benchmark; + dataset_size=10, + epochs=5, + iterations=2, + seed=0, + metrics=(_val_obj_metric(benchmark, val_data),), + ) + + obj_untrained = get(histories[1], :val_obj)[2][1] + obj_trained = get(histories[end], :val_obj)[2][end] + + @test obj_trained < obj_untrained + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 02565a1..88bbf7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,4 +13,8 @@ using DecisionFocusedLearningAlgorithms @testset "DAgger" begin include("dagger.jl") end + + @testset "MirrorDescent" begin + include("mirror_descent.jl") + end end