diff --git a/src/GPCRAnalysis.jl b/src/GPCRAnalysis.jl index 9ccf461..d618a67 100644 --- a/src/GPCRAnalysis.jl +++ b/src/GPCRAnalysis.jl @@ -52,7 +52,7 @@ export project_sequences, columnwise_entropy, align, residue_centroid, residue_c export StructAlign, residueindex, ismapped, skipnothing export BWScheme, lookupbw export aa_properties, aa_properties_zscored, aa_properties_matrix -export sidechaincentroid, scvector, inward_tm_residues, inward_ecl_residues +export sidechaincentroid, scvector, inward_tm_residues, inward_ecl_residues, pocket_pharmacophore, complementary_pharmacophore, detect_pocket_cavity export features_from_structure export forcecomponents, optimize_weights, forcedict diff --git a/src/pocket.jl b/src/pocket.jl index facceb0..9fdf116 100644 --- a/src/pocket.jl +++ b/src/pocket.jl @@ -1,5 +1,5 @@ function itpos2coord(idxpos, tmitps) - return [itp(idxpos) for itp in tmitps] + return SVector(tmitps[1](idxpos), tmitps[2](idxpos)) end function z2itpos(z, itpz::Interpolations.BSplineInterpolation{<:Any, <:Any, <:Any, <:BSpline{<:Linear}, <:Any}) @@ -28,9 +28,9 @@ function scvector(r::AbstractResidue) return sccoord === nothing ? zero(cacoord) : sccoord - cacoord end -function res_inside_hull(sccoord, tmcoords) # tmcoords is a list of 2D points in the z-plane of sccoord +function res_inside_hull(sccoord, tmcoords) # tmcoords is a list of SVector{2} points h = jarvismarch(tmcoords) - return insidehull(sccoord[1:2], h) + return insidehull(SVector(sccoord[1], sccoord[2]), h) end function tm_res_is_inward(r::AbstractResidue, itps) @@ -61,6 +61,16 @@ function ecl_res_is_inward(r::AbstractResidue, topcenter) return dot(topcenter .- accoord, scvec) > 0 end +function default_eclidxs(tmidxs) + length(tmidxs) == 7 || error("Expected 7 TM helices in tmidxs, got $(length(tmidxs))") + return ( + 1:tmidxs[1][1]-1, + tmidxs[2][end]+1:tmidxs[3][1]-1, + tmidxs[4][end]+1:tmidxs[5][1]-1, + tmidxs[6][end]+1:tmidxs[7][1]-1, + ) +end + """ inward_ecl_residues(seq, eclidxs) @@ -76,3 +86,226 @@ function inward_ecl_residues(seq::AbstractVector{<:AbstractResidue}, eclidxs; in end inward_ecl_residues(seq, eclidxs; includes_amino_terminus::Bool=false) = inward_ecl_residues(collectresidues(seq), eclidxs; includes_amino_terminus) + +function _pocket_z_range(seq::AbstractVector{<:AbstractResidue}, tmidxs) + z_max = maximum(alphacarbon_coordinates(r)[3] for tm in tmidxs for r in seq[tm]) + return 0.0 .. z_max +end + +""" + cavity = detect_pocket_cavity(seq, tmidxs; kwargs...) -> IsotropicGMM{3,Float64} + +Identify the binding pocket cavity of `seq` using a grid-based solvent-exclusion approach. + +`seq` must be aligned to the membrane (see [`align_to_membrane`](@ref)) so that z is the +membrane normal and z > 0 is extracellular. Probe positions are placed on a regular grid +within the TM helix bundle and accepted if they do not clash with any protein atom. + +Cavity detection has two stages: +1. **Geometric filter**: the probe must lie inside the 2D convex hull of the TM helix + centerlines at that z-level (computed from `tmidxs` via B-spline interpolation). +2. **Clash filter**: the probe center must be at least `probe_radius + vdW_radius` away + from every protein atom. + +All 7 TM helices should normally be passed in `tmidxs` so that the convex hull correctly +encloses the full helix bundle. + +# Keyword arguments +- `probe_radius=1.4`: solvent probe radius (Å); 1.4 approximates a water molecule +- `grid_spacing=0.25`: grid resolution (Å) +- `zi`: z-interval (Å) to search; defaults to the extracellular half of the TM bundle + (`0 .. z_max` of the TM alpha carbons) +- `ρmax=12.0`: xy radius of the search cylinder (Å) +- `σ=1.4`: Gaussian σ assigned to each probe in the returned `IsotropicGMM` + +The returned `IsotropicGMM` can be visualised with `gmmdisplay` (from +GaussianMixtureAlignment, requires a Makie backend) and used with `rocs_align` to +score steric complementarity of a ligand to the cavity. + +See also: [`pocket_pharmacophore`](@ref), [`complementary_pharmacophore`](@ref). +""" +function detect_pocket_cavity( + seq::AbstractVector{<:AbstractResidue}, + tmidxs; + probe_radius::Real = 1.4, + grid_spacing::Real = 0.25, + zi::Union{AbstractInterval, Nothing} = nothing, + ρmax::Real = 12.0, + σ::Real = 1.4, +) + zi === nothing && (zi = _pocket_z_range(seq, tmidxs)) + z_lo, z_hi = leftendpoint(zi), rightendpoint(zi) + + # Build TM centerline interpolants for the inside-bundle convex-hull test + tmcoords_mats = [alphacarbon_coordinates_matrix(seq[tm]) for tm in tmidxs] + itps = [[interpolate(tm[i,:], BSpline(i === 3 ? Linear() : Cubic())) for i=1:3] + for tm in tmcoords_mats] + + # Collect atom coords and vdW radii for atoms near the search region + buffer = probe_radius + 2.2 # ≥ max heavy-atom vdW radius + atom_coords = Vector{SVector{3,Float64}}() + atom_radii = Vector{Float64}() + for r in seq + rname = resname(r) + for a in r + c = SVector{3,Float64}(coords(a)) + (hypot(c[1], c[2]) < ρmax + buffer && + z_lo - buffer ≤ c[3] ≤ z_hi + buffer) || continue + aname = atomname(a) == "OXT" ? "O" : atomname(a) + push!(atom_coords, c) + push!(atom_radii, get(vanderwaalsradius, (rname, aname), 1.8)) + end + end + min_dist_sq = [(probe_radius + ar)^2 for ar in atom_radii] + + # Grid search: z is the outer loop so the convex hull is reused across the xy-plane + gaussians = IsotropicGaussian{3,Float64}[] + for z in range(z_lo, z_hi; step=Float64(grid_spacing)) + hull = jarvismarch(z2tmcoords(z, itps)) + for x in range(-ρmax, ρmax; step=Float64(grid_spacing)) + for y in range(-ρmax, ρmax; step=Float64(grid_spacing)) + x^2 + y^2 ≥ ρmax^2 && continue + insidehull(SVector(x, y), hull) || continue + pt = SVector{3,Float64}(x, y, z) + clash = false + for i in eachindex(atom_coords) + if sum(abs2, pt - atom_coords[i]) < min_dist_sq[i] + clash = true + break + end + end + clash && continue + push!(gaussians, IsotropicGaussian(pt, Float64(σ), 1.0)) + end + end + end + return IsotropicGMM(gaussians) +end +detect_pocket_cavity(seq::Chain, tmidxs; kwargs...) = + detect_pocket_cavity(collectresidues(seq), tmidxs; kwargs...) + +_complement_feature(f::Symbol) = + f === :PosIonizable ? :NegIonizable : + f === :NegIonizable ? :PosIonizable : + f === :Donor ? :Acceptor : + f === :Acceptor ? :Donor : f + +""" + mgmm = pocket_pharmacophore(seq, tmidxs; eclidxs=nothing, kwargs...) + +Build a pharmacophoric `IsotropicMultiGMM` for the binding pocket of `seq`. + +Inward-facing residues are identified via [`inward_tm_residues`](@ref) for the +transmembrane helices specified by `tmidxs`, and optionally via +[`inward_ecl_residues`](@ref) for the extracellular loops specified by `eclidxs`. +The pharmacophore is then built from those residues via +[`features_from_structure`](@ref); `kwargs` are forwarded to that function. + +The returned `IsotropicMultiGMM` can be visualized directly with `multigmmdisplay` +from GaussianMixtureAlignment (requires a Makie backend), and the complementary +pharmacophore for ligand docking can be obtained via +[`complementary_pharmacophore`](@ref). + +See also: [`complementary_pharmacophore`](@ref), [`inward_tm_residues`](@ref), +[`inward_ecl_residues`](@ref), [`features_from_structure`](@ref). +""" +function pocket_pharmacophore(seq::AbstractVector{<:AbstractResidue}, tmidxs; eclidxs=nothing, kwargs...) + if eclidxs === nothing + length(tmidxs) == 7 || error("Expected 7 TM helices in tmidxs, got $(length(tmidxs))") + end + tminward = inward_tm_residues(seq, tmidxs) + idxs = reduce(vcat, [tm[inward] for (tm, inward) in zip(tmidxs, tminward)]) + if eclidxs === nothing + eclidxs = default_eclidxs(tmidxs) + end + if !isempty(eclidxs) + eclinward = inward_ecl_residues(seq, eclidxs) + append!(idxs, reduce(vcat, [ecl[inward] for (ecl, inward) in zip(eclidxs, eclinward)])) + end + return features_from_structure(seq, idxs; kwargs...) +end +pocket_pharmacophore(seq::Chain, tmidxs; kwargs...) = + pocket_pharmacophore(collectresidues(seq), tmidxs; kwargs...) + +""" + cmgmm = complementary_pharmacophore(mgmm) + cmgmm = complementary_pharmacophore(mgmm, cavity; ampthreshold=0.0) + +Return the pharmacophoric complement of `mgmm`, representing the features an +ideal ligand should present to bind the pocket described by `mgmm`. + +Features are swapped according to the rules of molecular recognition: +- `:PosIonizable` (Arg/Lys on the receptor) ↔ `:NegIonizable` (anionic group on + ligand) +- `:NegIonizable` (Asp/Glu on the receptor) ↔ `:PosIonizable` (cationic group on + ligand) +- `:Donor` ↔ `:Acceptor` +- `:Hydrophobe`, `:Aromatic` are unchanged +- `:Steric` is dropped if `cavity` is supplied (which is a better + parametrization of the steric features of the pocket), otherwise unchanged. + +When called with a single argument, each complementary feature is placed at the +same position as the corresponding receptor atom. This is a useful approximation +but the resulting positions lie inside the receptor's van der Waals surface, so +a ligand placed there would clash with the receptor. + +When `cavity` (an `IsotropicGMM` from [`detect_pocket_cavity`](@ref)) is +supplied, each complementary feature is instead placed at the nearest void-space +probe position, projecting it out of the receptor into accessible space. This +form is preferred for docking with `rocs_align` or `gogma_align` (from +GaussianMixtureAlignment). + +See also: [`pocket_pharmacophore`](@ref), [`detect_pocket_cavity`](@ref). + +# Examples + +``` +julia> receptor_gmm = pocket_pharmacophore(chain, tmidxs); + +julia> cavity = detect_pocket_cavity(chain, tmidxs; zi=0.0 .. 25.0); + +julia> ligand_gmm = complementary_pharmacophore(receptor_gmm, cavity; ampthreshold=0.1); +""" +function complementary_pharmacophore(mgmm::IsotropicMultiGMM) + result = IsotropicMultiGMM(Dict{keytype(mgmm), valtype(mgmm)}()) + for (k, gmm) in mgmm + dest = get!(valtype(result), result, _complement_feature(k)) + for g in gmm + push!(dest, g) + end + end + return result +end + +function complementary_pharmacophore(mgmm::IsotropicMultiGMM, cavity::IsotropicGMM; ampthreshold::Real=0.0) + cavity_positions = [g.μ for g in cavity] + result = IsotropicMultiGMM(Dict{keytype(mgmm), valtype(mgmm)}()) + for (k, gmm) in mgmm + k === :Steric && continue + kcomp = _complement_feature(k) + dest = get!(valtype(result), result, kcomp) + @assert isempty(dest) + for g in gmm + nearest = argmin(sum(abs2, g.μ - c) for c in cavity_positions) + μp = cavity_positions[nearest] + amp = exp(-(sum(abs2, g.μ - μp) / (4 * g.σ^2))) + push!(dest, IsotropicGaussian(cavity_positions[nearest], g.σ, amp)) + end + # Consolidate by μ and σ before applying the amplitude threshold + if !isempty(dest) + g = first(dest) + samegs = Dict{Tuple{typeof(g.μ), typeof(g.σ)}, typeof(g)}() + for g in dest + gagg = get(samegs, (g.μ, g.σ), nothing) + if gagg === nothing + samegs[(g.μ, g.σ)] = g + else + samegs[(g.μ, g.σ)] = IsotropicGaussian(g.μ, g.σ, gagg.ϕ + g.ϕ) + end + end + gs = collect(Iterators.filter(g -> g.ϕ > ampthreshold, values(samegs))) + result.gmms[kcomp] = IsotropicGMM(gs) + end + end + return result +end diff --git a/test/runtests.jl b/test/runtests.jl index 8375643..cef2e08 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -296,6 +296,74 @@ using Test @test g.μ[3] ∈ 0 .. 8 end end + + # pocket_pharmacophore: should reproduce the manually-indexed result + pp = pocket_pharmacophore(opsd, opsd_tms[[2,3,5,6,7]]; eclidxs=Int[]) + @test pp isa IsotropicMultiGMM + @test keys(pp.gmms) == keys(tm_mgmm.gmms) + for k in keys(pp.gmms) + @test length(pp[k]) == length(tm_mgmm[k]) + for (g1, g2) in zip(pp[k], tm_mgmm[k]) + @test g1.μ ≈ g2.μ + @test g1.σ ≈ g2.σ + @test g1.ϕ ≈ g2.ϕ + end + end + + # pocket_pharmacophore with eclidxs: must have at least as many features + pp_ecl = pocket_pharmacophore(opsd, opsd_tms) + @test pp_ecl isa IsotropicMultiGMM + @test sum(length, values(pp_ecl.gmms)) >= sum(length, values(pp.gmms)) + + # complementary_pharmacophore: swaps ionic and H-bond features, preserves others + pp = pp_ecl + cp = complementary_pharmacophore(pp) + @test cp isa IsotropicMultiGMM + # Total number of Gaussians is preserved + @test sum(length, values(cp.gmms)) == sum(length, values(pp.gmms)) + # Ionizable and H-bond channels are swapped + for (orig, comp) in ((:PosIonizable, :NegIonizable), (:NegIonizable, :PosIonizable), + (:Donor, :Acceptor), (:Acceptor, :Donor)) + haskey(pp.gmms, orig) && @test length(cp[comp]) == length(pp[orig]) + end + # Hydrophobe/Aromatic/Steric are unchanged + for k in (:Hydrophobe, :Aromatic, :Steric) + haskey(pp.gmms, k) && @test length(cp[k]) == length(pp[k]) + end + # Double complement is identity + @test sum(length, values(complementary_pharmacophore(cp).gmms)) == sum(length, values(pp.gmms)) + + # detect_pocket_cavity: use coarse grid for speed + cavity = detect_pocket_cavity(opsd, opsd_tms; grid_spacing=1.0) + @test cavity isa IsotropicGMM + @test !isempty(cavity) + zi = GPCRAnalysis._pocket_z_range(collectresidues(opsd), opsd_tms) + for g in cavity + @test g.μ[1]^2 + g.μ[2]^2 < 12.0^2 + 0.1 # within ρmax cylinder + @test leftendpoint(zi) - 0.1 ≤ g.μ[3] ≤ rightendpoint(zi) + 0.1 # within z range + end + # Custom zi restricts search correctly + zi_small = 5.0 .. 15.0 + cavity_small = detect_pocket_cavity(opsd, opsd_tms; grid_spacing=2.0, zi=zi_small) + @test all(g -> leftendpoint(zi_small) - 0.1 ≤ g.μ[3] ≤ rightendpoint(zi_small) + 0.1, cavity_small) + @test length(cavity_small) < length(cavity) + + # complementary_pharmacophore with cavity: positions must come from the cavity + cavity_positions = Set([g.μ for g in cavity]) + cp_cavity = complementary_pharmacophore(pp, cavity) + @test cp_cavity isa IsotropicMultiGMM + for (k, gmm) in cp_cavity + for g in gmm + @test g.μ ∈ cavity_positions # every position is a valid void probe + end + end + # Feature types are still swapped + for (orig, comp) in ((:PosIonizable, :NegIonizable), (:Donor, :Acceptor)) + haskey(pp.gmms, orig) && @test length(cp_cavity[comp]) <= length(pp[orig]) + end + # Trimming low-amplitude features + cp_cavity_thresh = complementary_pharmacophore(pp, cavity; ampthreshold=0.1) + @test length(cp_cavity_thresh[:Steric]) < length(cp_cavity[:Steric]) end @testset "Forces" begin