WIP: Add support for HyperHessian backend

KristofferC · KristofferC · commit 9d3de403fbed · 2025-12-03T14:18:31.000+01:00
diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml
@@ -107,6 +107,7 @@ jobs:
           - FiniteDifferences
           - ForwardDiff
           - GTPSA
+          - HyperHessians
           - Mooncake
           - PolyesterForwardDiff
           - ReverseDiff
diff --git a/DifferentiationInterface/Project.toml b/DifferentiationInterface/Project.toml
@@ -14,6 +14,7 @@ Diffractor = "9f5e2b26-1114-432f-b630-d3fe2085c51c"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
 FastDifferentiation = "eb9bf01b-bf85-4b60-bf87-ee5de06c00be"
+HyperHessians = "06b494a0-c8e0-40cc-ad32-d99506a00a6c"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -38,6 +39,7 @@ DifferentiationInterfaceFastDifferentiationExt = "FastDifferentiation"
 DifferentiationInterfaceFiniteDiffExt = "FiniteDiff"
 DifferentiationInterfaceFiniteDifferencesExt = "FiniteDifferences"
 DifferentiationInterfaceForwardDiffExt = ["ForwardDiff", "DiffResults"]
+DifferentiationInterfaceHyperHessiansExt = "HyperHessians"
 DifferentiationInterfaceGPUArraysCoreExt = "GPUArraysCore"
 DifferentiationInterfaceGTPSAExt = "GTPSA"
 DifferentiationInterfaceMooncakeExt = "Mooncake"
@@ -63,6 +65,7 @@ Diffractor = "=0.2.6"
 Enzyme = "0.13.39"
 EnzymeCore = "0.8.8"
 FastDifferentiation = "0.4.3"
+HyperHessians = "0.1"
 FiniteDiff = "2.27.0"
 FiniteDifferences = "0.12.31"
 ForwardDiff = "0.10.36,1"
diff --git a/DifferentiationInterface/README.md b/DifferentiationInterface/README.md
@@ -37,6 +37,7 @@ We support the following backends defined by [ADTypes.jl](https://github.com/Sci
   - [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl)
   - [FiniteDifferences.jl](https://github.com/JuliaDiff/FiniteDifferences.jl)
   - [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl)
+  - [HyperHessians.jl](https://github.com/KristofferC/HyperHessians.jl)
   - [GTPSA.jl](https://github.com/bmad-sim/GTPSA.jl)
   - [Mooncake.jl](https://github.com/chalk-lab/Mooncake.jl)
   - [PolyesterForwardDiff.jl](https://github.com/JuliaDiff/PolyesterForwardDiff.jl)
diff --git a/DifferentiationInterface/docs/Project.toml b/DifferentiationInterface/docs/Project.toml
@@ -6,6 +6,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+HyperHessians = "06b494a0-c8e0-40cc-ad32-d99506a00a6c"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -21,6 +22,7 @@ Documenter = "1"
 DocumenterInterLinks = "1.1"
 FiniteDiff = "2.29"
 ForwardDiff = "1.2.2"
+HyperHessians = "0.1"
 PrettyTables = "3.1"
 SparseConnectivityTracer = "1.1.2"
 SparseMatrixColorings = "0.4.23"
diff --git a/DifferentiationInterface/docs/src/explanation/backends.md b/DifferentiationInterface/docs/src/explanation/backends.md
@@ -12,6 +12,7 @@ We support the following dense backend choices from [ADTypes.jl](https://github.
 - [`AutoFiniteDifferences`](@extref ADTypes.AutoFiniteDifferences)
 - [`AutoForwardDiff`](@extref ADTypes.AutoForwardDiff)
 - [`AutoGTPSA`](@extref ADTypes.AutoGTPSA)
+- [`AutoHyperHessians`](https://github.com/KristofferC/HyperHessians.jl)
 - [`AutoMooncake`](@extref ADTypes.AutoMooncake) and [`AutoMooncakeForward`](@extref ADTypes.AutoMooncake) (the latter is experimental)
 - [`AutoPolyesterForwardDiff`](@extref ADTypes.AutoPolyesterForwardDiff)
 - [`AutoReverseDiff`](@extref ADTypes.AutoReverseDiff)
@@ -32,11 +33,11 @@ In practice, many AD backends have custom implementations for high-level operato
 !!! details
 
     In the rough summary table below,
-    
+
       - ✅ means that we reuse the custom implementation from the backend;
       - ❌ means that a custom implementation doesn't exist, so we use our default fallbacks;
       - 🔀 means it's complicated or not done yet.
-    
+
     |                            | `pf` | `pb` | `der` | `grad` | `jac` | `hess` | `hvp` | `der2` |
     |:-------------------------- |:---- |:---- |:----- |:------ |:----- |:------ |:----- |:------ |
     | `AutoChainRules`           | ❌    | ✅    | ❌     | ❌      | ❌     | ❌      | ❌     | ❌      |
@@ -48,6 +49,7 @@ In practice, many AD backends have custom implementations for high-level operato
     | `AutoFiniteDifferences`    | 🔀    | ❌    | ❌     | ✅      | ✅     | ❌      | ❌     | ❌      |
     | `AutoForwardDiff`          | ✅    | ❌    | ✅     | ✅      | ✅     | ✅      | ✅     | ✅      |
     | `AutoGTPSA`                | ✅    | ❌    | ❌     | ✅      | ✅     | ✅      | ✅     | ✅      |
+    | `AutoHyperHessians`        | ❌    | ❌    | ❌     | ❌      | ❌     | ✅      | ✅     | ✅      |
     | `AutoMooncake`             | ❌    | ✅    | ❌     | ❌      | ❌     | ❌      | ❌     | ❌      |
     | `AutoMooncakeForward`      | ✅    | ❌    | ❌     | ❌      | ❌     | ❌      | ❌     | ❌      |
     | `AutoPolyesterForwardDiff` | 🔀    | ❌    | 🔀     | ✅      | ✅     | 🔀      | 🔀     | 🔀      |
@@ -69,6 +71,7 @@ Moreover, each context type is supported by a specific subset of backends:
 | `AutoFiniteDifferences`    | ✅                  | ✅               |
 | `AutoForwardDiff`          | ✅                  | ✅               |
 | `AutoGTPSA`                | ✅                  | ❌               |
+| `AutoHyperHessians`        | ✅                  | ✅               |
 | `AutoMooncake`             | ✅                  | ✅               |
 | `AutoMooncakeForward`      | ✅                  | ✅               |
 | `AutoPolyesterForwardDiff` | ✅                  | ✅               |
diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceHyperHessiansExt/DifferentiationInterfaceHyperHessiansExt.jl b/DifferentiationInterface/ext/DifferentiationInterfaceHyperHessiansExt/DifferentiationInterfaceHyperHessiansExt.jl
@@ -0,0 +1,259 @@
+module DifferentiationInterfaceHyperHessiansExt
+
+import DifferentiationInterface as DI
+import .DI: AutoHyperHessians
+using ADTypes: ForwardMode
+using HyperHessians:
+    DirectionalHVPConfig,
+    HessianConfig,
+    Chunk,
+    chunksize,
+    pickchunksize,
+    hessian,
+    hessian!,
+    hessian_gradient_value,
+    hessian_gradient_value!,
+    hessian,
+    hvp,
+    hvp!,
+    hvp_gradient_value,
+    hvp_gradient_value!
+
+## Traits
+DI.check_available(::DI.AutoHyperHessians) = true
+DI.inplace_support(::DI.AutoHyperHessians) = DI.InPlaceSupported()
+DI.hvp_mode(::DI.AutoHyperHessians) = DI.ForwardOverForward()
+DI.mode(::DI.AutoHyperHessians) = ForwardMode()
+
+chunk_from_backend(backend::DI.AutoHyperHessians, x) =
+    isnothing(backend.chunksize) ? Chunk(x) : Chunk{backend.chunksize}()
+chunk_from_backend(backend::DI.AutoHyperHessians, N::Integer, ::Type{T}) where {T} =
+    isnothing(backend.chunksize) ? Chunk(pickchunksize(N, T), T) : Chunk{backend.chunksize}()
+
+function DI.pick_batchsize(backend::DI.AutoHyperHessians, x::AbstractArray)
+    B = chunksize(chunk_from_backend(backend, x))
+    return DI.BatchSizeSettings{B}(length(x))
+end
+
+## Second derivative (scalar input)
+
+struct HyperHessiansSecondDerivativePrep{SIG} <: DI.SecondDerivativePrep{SIG}
+    _sig::Val{SIG}
+end
+
+function DI.prepare_second_derivative_nokwarg(
+        strict::Val, f, backend::DI.AutoHyperHessians, x::Number, contexts::Vararg{DI.Context, C}
+    ) where {C}
+    _sig = DI.signature(f, backend, x, contexts...; strict)
+    return HyperHessiansSecondDerivativePrep(_sig)
+end
+
+function DI.second_derivative(
+        f,
+        prep::HyperHessiansSecondDerivativePrep,
+        backend::DI.AutoHyperHessians,
+        x::Number,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    return hessian(fc, x)
+end
+
+function DI.second_derivative!(
+        f,
+        der2,
+        prep::HyperHessiansSecondDerivativePrep,
+        backend::DI.AutoHyperHessians,
+        x::Number,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    copyto!(der2, DI.second_derivative(f, prep, backend, x, contexts...))
+    return der2
+end
+
+function DI.value_derivative_and_second_derivative(
+        f,
+        prep::HyperHessiansSecondDerivativePrep,
+        backend::DI.AutoHyperHessians,
+        x::Number,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    res = hessian_gradient_value(fc, x)
+    return res.value, res.gradient, res.hessian
+end
+
+function DI.value_derivative_and_second_derivative!(
+        f,
+        der,
+        der2,
+        prep::HyperHessiansSecondDerivativePrep,
+        backend::DI.AutoHyperHessians,
+        x::Number,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    y, new_der, new_der2 = DI.value_derivative_and_second_derivative(f, prep, backend, x, contexts...)
+    copyto!(der, new_der)
+    copyto!(der2, new_der2)
+    return y, der, der2
+end
+
+## Preparation structs
+
+struct HyperHessiansHessianPrep{SIG, C} <: DI.HessianPrep{SIG}
+    _sig::Val{SIG}
+    cfg::C
+end
+
+struct HyperHessiansHVPPrep{SIG, C} <: DI.HVPPrep{SIG}
+    _sig::Val{SIG}
+    cfg::C
+end
+
+## Hessian
+
+function DI.prepare_hessian_nokwarg(
+        strict::Val, f, backend::DI.AutoHyperHessians, x::AbstractArray, contexts::Vararg{DI.Context, C}
+    ) where {C}
+    _sig = DI.signature(f, backend, x, contexts...; strict)
+    cfg = HessianConfig(x, chunk_from_backend(backend, x))
+    return HyperHessiansHessianPrep(_sig, cfg)
+end
+
+function DI.hessian(
+        f,
+        prep::HyperHessiansHessianPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    return hessian(fc, x, prep.cfg)
+end
+
+function DI.hessian!(
+        f,
+        hess,
+        prep::HyperHessiansHessianPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    return hessian!(hess, fc, x, prep.cfg)
+end
+
+function DI.value_gradient_and_hessian(
+        f,
+        prep::HyperHessiansHessianPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    res = hessian_gradient_value(fc, x, prep.cfg)
+    return res.value, res.gradient, res.hessian
+end
+
+function DI.value_gradient_and_hessian!(
+        f,
+        grad,
+        hess,
+        prep::HyperHessiansHessianPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    val = hessian_gradient_value!(hess, grad, fc, x, prep.cfg)
+    return val, grad, hess
+end
+
+## HVP
+
+function DI.prepare_hvp_nokwarg(
+        strict::Val, f, backend::DI.AutoHyperHessians, x::AbstractArray, tx::NTuple, contexts::Vararg{DI.Context, C}
+    ) where {C}
+    _sig = DI.signature(f, backend, x, tx, contexts...; strict)
+    cfg = DirectionalHVPConfig(x, tx, chunk_from_backend(backend, x))
+    return HyperHessiansHVPPrep(_sig, cfg)
+end
+
+function DI.prepare_hvp_same_point(
+        f,
+        prep::HyperHessiansHVPPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        tx::NTuple,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, tx, contexts...)
+    return prep
+end
+
+function DI.hvp(
+        f,
+        prep::HyperHessiansHVPPrep,
+        backend::AutoHyperHessians,
+        x,
+        tx::NTuple,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, tx, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    return hvp(fc, x, tx, prep.cfg)
+end
+
+function DI.hvp!(
+        f,
+        tg::NTuple,
+        prep::HyperHessiansHVPPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        tx::NTuple,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, tx, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    return hvp!(tg, fc, x, tx, prep.cfg)
+end
+
+function DI.gradient_and_hvp(
+        f,
+        prep::HyperHessiansHVPPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        tx::NTuple,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, tx, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    res = hvp_gradient_value(fc, x, tx, prep.cfg)
+    return res.gradient, res.hvp
+end
+
+function DI.gradient_and_hvp!(
+        f,
+        grad,
+        tg::NTuple,
+        prep::HyperHessiansHVPPrep,
+        backend::DI.AutoHyperHessians,
+        x,
+        tx::NTuple,
+        contexts::Vararg{DI.Context, C},
+    ) where {C}
+    DI.check_prep(f, prep, backend, x, tx, contexts...)
+    fc = DI.fix_tail(f, map(DI.unwrap, contexts)...)
+    hvp_gradient_value!(tg, grad, fc, x, tx, prep.cfg)
+    return grad, tg
+end
+
+end
diff --git a/DifferentiationInterface/src/DifferentiationInterface.jl b/DifferentiationInterface/src/DifferentiationInterface.jl
@@ -64,6 +64,7 @@ include("second_order/hessian.jl")
 
 include("misc/differentiate_with.jl")
 include("misc/from_primitive.jl")
+include("misc/autohyperhessians.jl")
 include("misc/sparsity_detector.jl")
 include("misc/simple_finite_diff.jl")
 include("misc/zero_backends.jl")
@@ -122,6 +123,7 @@ export AutoReverseDiff
 export AutoSymbolics
 export AutoTracker
 export AutoZygote
+export AutoHyperHessians
 
 export AutoSparse
 
diff --git a/DifferentiationInterface/src/misc/autohyperhessians.jl b/DifferentiationInterface/src/misc/autohyperhessians.jl
@@ -0,0 +1,16 @@
+"""
+    AutoHyperHessians(; chunksize = nothing)
+
+Lightweight ADTypes backend tag for HyperHessians. The `chunksize` keyword can
+be set to a positive `Int` to override HyperHessians' chunk heuristic; `nothing`
+lets HyperHessians choose.
+"""
+struct AutoHyperHessians{CS} <: ADTypes.AbstractADType
+    chunksize::CS
+    function AutoHyperHessians(; chunksize::Union{Nothing, Int} = nothing)
+        if chunksize isa Int
+            chunksize > 0 || throw(ArgumentError("chunksize must be positive, got $chunksize"))
+        end
+        return new{typeof(chunksize)}(chunksize)
+    end
+end
diff --git a/DifferentiationInterface/test/Back/HyperHessians/Project.toml b/DifferentiationInterface/test/Back/HyperHessians/Project.toml
diff --git a/DifferentiationInterface/test/Back/HyperHessians/test.jl b/DifferentiationInterface/test/Back/HyperHessians/test.jl