diff --git a/.env.local.example b/.env.local.example index 03c5b67c8..79e417a23 100644 --- a/.env.local.example +++ b/.env.local.example @@ -15,6 +15,13 @@ # Default: C:\Tools\mailpit #MAILPIT_INSTALL_DIR=C:\Tools\mailpit +# Hangfire Scheduler (Enabled by Default) +# Master toggle for the background scheduler. Defaults to true; the app +# registers Hangfire and starts the background server. Set to false to +# disable the scheduler in this environment. Hangfire's tables live in +# the VIPER database under the HangFire schema. +#Hangfire__Enabled=false + # Jenkins Build Trigger (pre-push hook) # Get your API token from Jenkins: User menu > Configure > API Token #JENKINS_USER=your-username diff --git a/VueApp/src/layouts/LeftNav.vue b/VueApp/src/layouts/LeftNav.vue index 6812390c5..c1b615f83 100644 --- a/VueApp/src/layouts/LeftNav.vue +++ b/VueApp/src/layouts/LeftNav.vue @@ -60,8 +60,8 @@ clickable v-ripple :href="menuItem.menuItemUrl" - target="_blank" - rel="noopener noreferrer" + :target="menuItem.isExternalSite ? '_blank' : undefined" + :rel="menuItem.isExternalSite ? 'noopener noreferrer' : undefined" :class="menuItem.displayClass" > @@ -162,6 +162,22 @@ function isItemActive(routeTo: string | null): boolean { return score > 0 && score === bestMatchScore.value } +// True when the URL resolves to a real SPA route. Vue Router's catch-all +// (path: "/:catchAll(.*)*" etc.) matches anything not otherwise registered, +// so a successful resolve isn't enough — we also reject paths that match +// only via a regex catch-all segment. +function isInSpaRoute(url: string): boolean { + try { + const matched = router.resolve(url).matched + if (matched.length === 0) { + return false + } + return matched.some((r) => !/\(\.\*\)/.test(r.path)) + } catch { + return false + } +} + type OverflowTitleElement = HTMLElement & { _overflowTitleObserver?: ResizeObserver } @@ -252,17 +268,29 @@ async function getLeftNav() { } } - let routeToUrl = null + // Resolve to either an in-SPA route (RouterLink, client-side nav) + // or a same-tab anchor (full page load). URLs that don't match + // any registered SPA route fall through to the catch-all 404 if + // RouterLink-handled, so render those as plain anchors instead. + let routeToUrl: string | null = null + let internalAnchorUrl: string | undefined = undefined if (!isExternalUrl && r.menuItemURL.length > 0) { - if (isRelativeUrl && props.navarea && props.nav) { - routeToUrl = `/${props.nav.toUpperCase()}/${r.menuItemURL}` + const candidate = + isRelativeUrl && props.navarea && props.nav + ? `/${props.nav.toUpperCase()}/${r.menuItemURL}` + : r.menuItemURL + if (isInSpaRoute(candidate)) { + routeToUrl = candidate } else { - routeToUrl = r.menuItemURL + // Plain-anchor hrefs need the SPA's base path prepended + // (e.g. `/2/`) since the browser won't apply Vue Router's + // base for direct hrefs. router.resolve handles this. + internalAnchorUrl = router.resolve(candidate).href } } return { - menuItemUrl: isExternalUrl ? r.menuItemURL : undefined, + menuItemUrl: isExternalUrl ? r.menuItemURL : internalAnchorUrl, routeTo: routeToUrl, menuItemText: r.menuItemText, clickable: r.menuItemURL.length > 0, diff --git a/package-lock.json b/package-lock.json index 55249be4b..d8182f1b5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2665,9 +2665,9 @@ "license": "MIT" }, "node_modules/fast-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", - "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", "dev": true, "funding": [ { diff --git a/test/HealthChecks/HangfireHealthCheckTests.cs b/test/HealthChecks/HangfireHealthCheckTests.cs new file mode 100644 index 000000000..9662c3e45 --- /dev/null +++ b/test/HealthChecks/HangfireHealthCheckTests.cs @@ -0,0 +1,166 @@ +using Microsoft.Extensions.Diagnostics.HealthChecks; +using NSubstitute; +using Viper.Classes.HealthChecks; + +namespace Viper.test.HealthChecks +{ + public class HangfireHealthCheckTests + { + private static HealthCheckContext CreateContext(HangfireHealthCheck sut) + { + return new HealthCheckContext + { + Registration = new HealthCheckRegistration("hangfire", sut, null, null) + }; + } + + private static (Hangfire.JobStorage storage, Hangfire.Storage.IMonitoringApi monitoring) CreateStorage() + { + var monitoring = Substitute.For(); + var storage = Substitute.For(); + storage.GetMonitoringApi().Returns(monitoring); + return (storage, monitoring); + } + + private static Hangfire.Storage.Monitoring.StatisticsDto SampleStats(long servers = 1) => new() + { + Servers = servers, + Enqueued = 2, + Scheduled = 3, + Processing = 4, + Failed = 5, + Recurring = 6 + }; + + [Fact] + public async Task CheckHealthAsync_HealthyWhenServersHaveRecentHeartbeats() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(SampleStats()); + monitoring.Servers().Returns(new List + { + new() { Name = "srv-1", Heartbeat = DateTime.UtcNow } + }); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Healthy, result.Status); + Assert.Contains("Hangfire OK", result.Description); + Assert.Equal(1L, result.Data["servers"]); + } + + [Fact] + public async Task CheckHealthAsync_DegradedWhenNoServersRegistered() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(SampleStats(0)); + monitoring.Servers().Returns(new List()); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Degraded, result.Status); + Assert.Contains("no servers registered", result.Description); + } + + [Fact] + public async Task CheckHealthAsync_UnhealthyWhenAllHeartbeatsStale() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(SampleStats()); + monitoring.Servers().Returns(new List + { + new() { Name = "srv-stale", Heartbeat = DateTime.UtcNow.AddMinutes(-10) } + }); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Unhealthy, result.Status); + Assert.Contains("stale", result.Description); + } + + [Fact] + public async Task CheckHealthAsync_UnhealthyWhenServerHasNullHeartbeat() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(SampleStats()); + monitoring.Servers().Returns(new List + { + new() { Name = "srv-never", Heartbeat = null } + }); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Unhealthy, result.Status); + Assert.Contains("never", result.Description); + } + + [Fact] + public async Task CheckHealthAsync_HealthyWhenAtLeastOneHeartbeatIsRecent() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(SampleStats(2)); + monitoring.Servers().Returns(new List + { + new() { Name = "srv-stale", Heartbeat = DateTime.UtcNow.AddMinutes(-10) }, + new() { Name = "srv-fresh", Heartbeat = DateTime.UtcNow } + }); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Healthy, result.Status); + Assert.Contains("Hangfire OK", result.Description); + } + + [Fact] + public async Task CheckHealthAsync_UnhealthyWhenStorageThrows() + { + var storage = Substitute.For(); + var boom = new InvalidOperationException("boom"); + storage.GetMonitoringApi().Returns(_ => throw boom); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Equal(HealthStatus.Unhealthy, result.Status); + Assert.Contains("unreachable", result.Description); + Assert.Same(boom, result.Exception); + } + + [Fact] + public async Task CheckHealthAsync_DataDictionaryContainsAllStatsKeys() + { + var (storage, monitoring) = CreateStorage(); + monitoring.GetStatistics().Returns(new Hangfire.Storage.Monitoring.StatisticsDto + { + Servers = 1, + Enqueued = 7, + Scheduled = 8, + Processing = 9, + Failed = 10, + Recurring = 11 + }); + monitoring.Servers().Returns(new List + { + new() { Name = "srv-1", Heartbeat = DateTime.UtcNow } + }); + + var sut = new HangfireHealthCheck(storage); + var result = await sut.CheckHealthAsync(CreateContext(sut)); + + Assert.Contains("servers", result.Data.Keys); + Assert.Contains("enqueued", result.Data.Keys); + Assert.Contains("scheduled", result.Data.Keys); + Assert.Contains("processing", result.Data.Keys); + Assert.Contains("failed", result.Data.Keys); + Assert.Contains("recurring", result.Data.Keys); + Assert.Equal(1L, result.Data["servers"]); + Assert.Equal(7L, result.Data["enqueued"]); + Assert.Equal(11L, result.Data["recurring"]); + } + } +} diff --git a/test/RAPS/RapsRoleRefreshScheduledJobTests.cs b/test/RAPS/RapsRoleRefreshScheduledJobTests.cs new file mode 100644 index 000000000..366107d99 --- /dev/null +++ b/test/RAPS/RapsRoleRefreshScheduledJobTests.cs @@ -0,0 +1,20 @@ +using System.Reflection; +using Viper.Areas.RAPS.Jobs; +using Viper.Areas.Scheduler.Services; + +namespace Viper.test.RAPS +{ + public sealed class RapsRoleRefreshScheduledJobTests + { + [Fact] + public void Class_IsDecoratedWithScheduledJob() + { + var attr = typeof(RapsRoleRefreshScheduledJob).GetCustomAttribute(); + + Assert.NotNull(attr); + Assert.Equal("raps:role-refresh", attr.Id); + Assert.Equal("0 0 * * *", attr.Cron); + Assert.Equal("Pacific Standard Time", attr.TimeZoneId); + } + } +} diff --git a/test/Scheduler/HangfireJobLoggingFilterTests.cs b/test/Scheduler/HangfireJobLoggingFilterTests.cs new file mode 100644 index 000000000..0bcf1dc05 --- /dev/null +++ b/test/Scheduler/HangfireJobLoggingFilterTests.cs @@ -0,0 +1,172 @@ +using Microsoft.Extensions.Logging; +using NSubstitute; +using NSubstitute.Core; +using Viper.Classes.Scheduler; + +namespace Viper.test.Scheduler +{ + public class HangfireJobLoggingFilterTests + { + // Concrete method target for Hangfire.Common.Job — needs a real MethodInfo. + private static class FakeJob + { + public static void Run(string s, int i) + { + _ = s; + _ = i; + } + } + + private static Hangfire.Server.PerformContext BuildPerformContext( + string? recurringJobId = null, + string jobId = "123") + { + var method = typeof(FakeJob).GetMethod(nameof(FakeJob.Run))!; + var job = new Hangfire.Common.Job(typeof(FakeJob), method, new object[] { "arg1", 42 }); + var backgroundJob = new Hangfire.BackgroundJob(jobId, job, DateTime.UtcNow); + + var storage = Substitute.For(); + var connection = Substitute.For(); + // Hangfire JSON-deserializes parameter values via SerializationHelper, so the mock + // must return a JSON-encoded string (or null), not a raw value. + var encodedParam = recurringJobId is null + ? null + : Hangfire.Common.SerializationHelper.Serialize(recurringJobId); + connection.GetJobParameter(jobId, "RecurringJobId").Returns(encodedParam); + + var cancel = Substitute.For(); + return new Hangfire.Server.PerformContext(storage, connection, backgroundJob, cancel); + } + + private static IDisposable CaptureScope(ILogger logger, out List> captured) + { + var list = new List>(); + captured = list; + var scope = Substitute.For(); + logger.BeginScope(Arg.Do>(d => list.Add(d))).Returns(scope); + return scope; + } + + // ILogger.Log is generic; the TState passed by extension methods is an internal + // struct (FormattedLogValues) which makes Arg.Any() unreliable. Inspecting the + // recorded calls directly avoids the generic-arg matching problem. + private static IReadOnlyList GetLogCalls(ILogger logger, LogLevel level) + { + return logger.ReceivedCalls() + .Where(c => c.GetMethodInfo().Name == nameof(ILogger.Log) + && c.GetArguments().Length >= 1 + && c.GetArguments()[0] is LogLevel l + && l == level) + .ToList(); + } + + private static string GetMessage(ICall call) + { + // Args layout: [LogLevel, EventId, TState, Exception, Func] + var args = call.GetArguments(); + return args[2]?.ToString() ?? string.Empty; + } + + private static Exception? GetException(ICall call) + { + return call.GetArguments()[3] as Exception; + } + + [Fact] + public void OnPerforming_LogsStartMessageWithSanitizedJobInfo() + { + var logger = Substitute.For>(); + CaptureScope(logger, out _); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(); + var performing = new Hangfire.Server.PerformingContext(perf); + + sut.OnPerforming(performing); + + var infoCalls = GetLogCalls(logger, LogLevel.Information); + Assert.Single(infoCalls); + Assert.Contains("Hangfire job starting", GetMessage(infoCalls[0])); + } + + [Fact] + public void OnPerforming_BeginsScopeWithJobMetadata() + { + var logger = Substitute.For>(); + CaptureScope(logger, out var captured); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(recurringJobId: "daily-cleanup"); + var performing = new Hangfire.Server.PerformingContext(perf); + + sut.OnPerforming(performing); + + logger.Received(1).BeginScope(Arg.Any>()); + Assert.Single(captured); + var dict = captured[0]; + Assert.Contains("jobId", dict.Keys); + Assert.Contains("recurringJobId", dict.Keys); + } + + [Fact] + public void OnPerformed_LogsCompletionWhenNoException() + { + var logger = Substitute.For>(); + CaptureScope(logger, out _); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(); + var performed = new Hangfire.Server.PerformedContext(perf, result: null, canceled: false, exception: null); + + sut.OnPerformed(performed); + + var infoCalls = GetLogCalls(logger, LogLevel.Information); + Assert.Single(infoCalls); + Assert.Contains("Hangfire job completed", GetMessage(infoCalls[0])); + } + + [Fact] + public void OnPerformed_LogsErrorWhenExceptionPresent() + { + var logger = Substitute.For>(); + CaptureScope(logger, out _); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(); + var failure = new InvalidOperationException("kaboom"); + var performed = new Hangfire.Server.PerformedContext(perf, result: null, canceled: false, exception: failure); + + sut.OnPerformed(performed); + + var errorCalls = GetLogCalls(logger, LogLevel.Error); + Assert.Single(errorCalls); + Assert.Same(failure, GetException(errorCalls[0])); + } + + [Fact] + public void OnPerformed_DisposesScopeStashedByOnPerforming() + { + var logger = Substitute.For>(); + var scope = CaptureScope(logger, out _); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(); + var performing = new Hangfire.Server.PerformingContext(perf); + var performed = new Hangfire.Server.PerformedContext(perf, result: null, canceled: false, exception: null); + + sut.OnPerforming(performing); + sut.OnPerformed(performed); + + scope.Received(1).Dispose(); + } + + [Fact] + public void OnPerformed_DoesNotThrowWhenScopeMissing() + { + var logger = Substitute.For>(); + CaptureScope(logger, out _); + var sut = new HangfireJobLoggingFilter(logger); + var perf = BuildPerformContext(); + var performed = new Hangfire.Server.PerformedContext(perf, result: null, canceled: false, exception: null); + + // No prior OnPerforming, so context.Items has no scope key. + var ex = Record.Exception(() => sut.OnPerformed(performed)); + Assert.Null(ex); + } + } +} diff --git a/test/Scheduler/ScheduledJobDiscoveryTests.cs b/test/Scheduler/ScheduledJobDiscoveryTests.cs new file mode 100644 index 000000000..9cb9659a9 --- /dev/null +++ b/test/Scheduler/ScheduledJobDiscoveryTests.cs @@ -0,0 +1,76 @@ +using Microsoft.Extensions.DependencyInjection; +using Viper.Areas.Scheduler.Models; +using Viper.Areas.Scheduler.Services; + +namespace Viper.test.Scheduler +{ + public sealed class ScheduledJobDiscoveryTests + { + [ScheduledJob(id: "test:good", cron: "0 0 * * *")] + public sealed class GoodJob : IScheduledJob + { + public Task RunAsync(ScheduledJobContext context, CancellationToken ct) => Task.CompletedTask; + } + + [ScheduledJob(id: "test:second", cron: "0 1 * * *")] + public sealed class SecondJob : IScheduledJob + { + public Task RunAsync(ScheduledJobContext context, CancellationToken ct) => Task.CompletedTask; + } + + public class JobMissingAttribute : IScheduledJob + { + public Task RunAsync(ScheduledJobContext context, CancellationToken ct) => Task.CompletedTask; + } + + [Fact] + public void RegisterScheduledJobs_DiscoversValidJobs() + { + var services = new ServiceCollection(); + var found = ManualDiscover(services, new[] { typeof(GoodJob), typeof(SecondJob) }); + + Assert.Equal(2, found.Count); + Assert.Contains(found, m => m.Id == "test:good"); + Assert.Contains(found, m => m.Id == "test:second"); + } + + [Fact] + public void RegisterScheduledJobs_ThrowsWhenAttributeMissing() + { + var services = new ServiceCollection(); + var ex = Assert.Throws(() => + ManualDiscover(services, new[] { typeof(JobMissingAttribute) })); + Assert.Contains("[ScheduledJob]", ex.Message); + } + + [Fact] + public void RegisterScheduledJobs_ThrowsOnDuplicateId() + { + var services = new ServiceCollection(); + var ex = Assert.Throws(() => + ManualDiscover(services, new[] { typeof(GoodJob), typeof(DuplicateIdJob) })); + Assert.Contains("Duplicate", ex.Message); + } + + [ScheduledJob(id: "test:good", cron: "0 0 * * *")] + public sealed class DuplicateIdJob : IScheduledJob + { + public Task RunAsync(ScheduledJobContext context, CancellationToken ct) => Task.CompletedTask; + } + + private static IReadOnlyList ManualDiscover( + IServiceCollection services, + Type[] types) + { + var stub = new StubAssembly(types); + return ScheduledJobDiscovery.RegisterScheduledJobs(services, new[] { stub }); + } + + private sealed class StubAssembly : System.Reflection.Assembly + { + private readonly Type[] _types; + public StubAssembly(Type[] types) { _types = types; } + public override Type[] GetTypes() => _types; + } + } +} diff --git a/test/Scheduler/ScheduledJobRunnerTests.cs b/test/Scheduler/ScheduledJobRunnerTests.cs new file mode 100644 index 000000000..b850d9c44 --- /dev/null +++ b/test/Scheduler/ScheduledJobRunnerTests.cs @@ -0,0 +1,70 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using NSubstitute; +using Viper.Areas.Scheduler.Models; +using Viper.Areas.Scheduler.Services; + +namespace Viper.test.Scheduler +{ + public sealed class ScheduledJobRunnerTests + { + // Captures the ScheduledJobContext the runner hands the job, so tests + // can assert that ModBy is set correctly. + private sealed class CapturingJob : IScheduledJob + { + public ScheduledJobContext? CapturedContext { get; private set; } + public bool Ran { get; private set; } + + public Task RunAsync(ScheduledJobContext context, CancellationToken ct) + { + CapturedContext = context; + Ran = true; + return Task.CompletedTask; + } + } + + [Fact] + public async Task RunAsync_ResolvesJobByIdAndStampsSchedulerActor() + { + var capturing = new CapturingJob(); + var services = new ServiceCollection(); + services.AddSingleton(capturing); + + var registry = new ScheduledJobRegistry(new Dictionary + { + ["test:capturing"] = new ScheduledJobMetadata( + typeof(CapturingJob), "test:capturing", "0 0 * * *", "UTC"), + }); + services.AddSingleton(registry); + + var provider = services.BuildServiceProvider(); + var scopeFactory = provider.GetRequiredService(); + var logger = Substitute.For>(); + var runner = new ScheduledJobRunner(scopeFactory, logger); + + var jobCt = Substitute.For(); + jobCt.ShutdownToken.Returns(CancellationToken.None); + await runner.RunAsync("test:capturing", jobCt, performContext: null); + + Assert.True(capturing.Ran); + Assert.NotNull(capturing.CapturedContext); + Assert.Equal(ScheduledJobContext.SchedulerActor, capturing.CapturedContext!.ModBy); + } + + [Fact] + public async Task RunAsync_LogsAndSkipsWhenIdUnknown() + { + var services = new ServiceCollection(); + var registry = new ScheduledJobRegistry(new Dictionary()); + services.AddSingleton(registry); + var provider = services.BuildServiceProvider(); + var logger = Substitute.For>(); + var runner = new ScheduledJobRunner(provider.GetRequiredService(), logger); + + var jobCt = Substitute.For(); + jobCt.ShutdownToken.Returns(CancellationToken.None); + var ex = await Record.ExceptionAsync(() => runner.RunAsync("does-not-exist", jobCt, performContext: null)); + Assert.Null(ex); + } + } +} diff --git a/test/Viper.test.csproj b/test/Viper.test.csproj index e3866ed43..710835805 100644 --- a/test/Viper.test.csproj +++ b/test/Viper.test.csproj @@ -34,6 +34,7 @@ all + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/web/Areas/RAPS/Jobs/RapsRoleRefreshScheduledJob.cs b/web/Areas/RAPS/Jobs/RapsRoleRefreshScheduledJob.cs new file mode 100644 index 000000000..2b4347427 --- /dev/null +++ b/web/Areas/RAPS/Jobs/RapsRoleRefreshScheduledJob.cs @@ -0,0 +1,45 @@ +using Viper.Areas.RAPS.Services; +using Viper.Areas.Scheduler.Models; +using Viper.Areas.Scheduler.Services; +using Viper.Classes.SQLContext; +using Viper.Classes.Utilities; + +namespace Viper.Areas.RAPS.Jobs; + +/// +/// First consumer of the scheduled-job abstraction: nightly role membership +/// refresh. Wraps and threads the run's +/// through so the audit log clearly +/// distinguishes scheduler-driven changes ("__sched") from +/// admin-driven manual runs (a real LoginId). +/// +[ScheduledJob(id: "raps:role-refresh", cron: "0 0 * * *", TimeZoneId = "Pacific Standard Time")] +public sealed class RapsRoleRefreshScheduledJob : IScheduledJob +{ + private readonly RAPSContext _rapsContext; + private readonly ILogger _logger; + + public RapsRoleRefreshScheduledJob( + RAPSContext rapsContext, + ILogger logger) + { + _rapsContext = rapsContext; + _logger = logger; + } + + public async Task RunAsync(ScheduledJobContext context, CancellationToken ct) + { + context.WriteLine($"RAPS role refresh starting (modBy={context.ModBy})"); + var roleViews = new RoleViews(_rapsContext); + var messages = await roleViews.UpdateRoles(modBy: context.ModBy, debugOnly: false, ct: ct); + foreach (var message in messages) + { + context.WriteLine(message); + } + context.WriteLine($"Done. {messages.Count} change message(s)."); + _logger.LogInformation( + "RAPS role refresh (modBy={ModBy}) wrote {ChangeCount} change message(s)", + LogSanitizer.SanitizeString(context.ModBy), + messages.Count); + } +} diff --git a/web/Areas/RAPS/Services/RAPSAuditService.cs b/web/Areas/RAPS/Services/RAPSAuditService.cs index eda28d872..41153a2fc 100644 --- a/web/Areas/RAPS/Services/RAPSAuditService.cs +++ b/web/Areas/RAPS/Services/RAPSAuditService.cs @@ -253,12 +253,14 @@ public void AuditPermissionChange(TblPermission permission, AuditActionType acti /// /// The rolemember object /// Create Update or Delete - public void AuditRoleMemberChange(TblRoleMember roleMember, AuditActionType actionType, string? comment) + /// Optional free-text reason recorded on the audit row. + /// Audit actor; falls back to the current CAS user when null or blank. + public void AuditRoleMemberChange(TblRoleMember roleMember, AuditActionType actionType, string? comment, string? modBy = null) { TblLog tblLog = new() { ModTime = DateTime.Now, - ModBy = UserHelper.GetCurrentUser()?.LoginId, + ModBy = string.IsNullOrWhiteSpace(modBy) ? UserHelper.GetCurrentUser()?.LoginId : modBy, RoleId = roleMember.RoleId, MemberId = roleMember.MemberId, Comment = comment diff --git a/web/Areas/RAPS/Services/RoleViews.cs b/web/Areas/RAPS/Services/RoleViews.cs index 3ac80a46d..7340d1cdb 100644 --- a/web/Areas/RAPS/Services/RoleViews.cs +++ b/web/Areas/RAPS/Services/RoleViews.cs @@ -8,9 +8,11 @@ namespace Viper.Areas.RAPS.Services { public class RoleViews { + /// Default audit actor when no caller-supplied value is given (e.g. legacy callers). + public const string DefaultModBy = "__system"; + private readonly RAPSContext _RAPSContext; private readonly RAPSAuditService _auditService; - private static readonly string _ModByName = "__system"; private static readonly string _AddComment = "Adding to role based on view {0}"; private static readonly string _DeleteComment = "Removing from role based on view {0}"; @@ -20,6 +22,9 @@ public RoleViews(RAPSContext RAPSContext) _auditService = new RAPSAuditService(RAPSContext); } + private static string ResolveActor(string? modBy) + => string.IsNullOrWhiteSpace(modBy) ? DefaultModBy : modBy; + public async Task> GetViewNames() { List allViews = await _RAPSContext.GetAllRapsViews.FromSql($"dbo.usp_getAllRapsViews") @@ -28,19 +33,27 @@ public async Task> GetViewNames() } /// - /// Update the membership of all roles defined by a view + /// Update the membership of all roles defined by a view. /// - /// - /// - public async Task> UpdateRoles(bool debugOnly = false) + /// + /// Audit actor stamped on every TblRoleMember and TblLog + /// row written by this run. Pass "__sched" for nightly + /// recurring runs, the LoginId for manual admin runs, or rely on the + /// for legacy callers. + /// + /// If true, only write messages, don't change the DB. + /// Honored between roles so Hangfire-driven runs can be cancelled cleanly during deploys or shutdown. + public async Task> UpdateRoles(string? modBy = null, bool debugOnly = false, CancellationToken ct = default) { + var actor = ResolveActor(modBy); List messages = new(); var roles = await _RAPSContext.TblRoles .Where(r => !string.IsNullOrEmpty(r.ViewName)) - .ToListAsync(); + .ToListAsync(ct); foreach (var role in roles) { - await UpdateRole(role, messages, debugOnly); + ct.ThrowIfCancellationRequested(); + await UpdateRole(role, messages, debugOnly, actor); } return messages; } @@ -51,7 +64,8 @@ public async Task> UpdateRoles(bool debugOnly = false) /// The role /// If running as a routine for multiple roles, the messages will be appended to this list. /// If true, only write messages, don't change the DB - public async Task> UpdateRole(TblRole role, List? messages = null, bool debugOnly = false) + /// Audit actor; defaults to . + public async Task> UpdateRole(TblRole role, List? messages = null, bool debugOnly = false, string? modBy = null) { if (string.IsNullOrEmpty(role.ViewName)) { @@ -101,21 +115,22 @@ public async Task> UpdateRole(TblRole role, List? messages messages.Add(string.Format("View {0} has 0 members", role.ViewName)); } + var actor = ResolveActor(modBy); if (!debugOnly) { foreach (string toAddMemberId in toAdd) { - AddRoleMember(role.RoleId, toAddMemberId, role.ViewName); + AddRoleMember(role.RoleId, toAddMemberId, role.ViewName, actor); } foreach (TblRoleMember toDeleteMember in toDelete) { - DeleteRoleMember(toDeleteMember, role.ViewName); + DeleteRoleMember(toDeleteMember, role.ViewName, actor); } } return messages; } - private void AddRoleMember(int roleId, string memberId, string viewName) + private void AddRoleMember(int roleId, string memberId, string viewName, string modBy) { using var transaction = _RAPSContext.Database.BeginTransaction(); TblRoleMember tblRoleMember = new() @@ -124,19 +139,19 @@ private void AddRoleMember(int roleId, string memberId, string viewName) MemberId = memberId, ViewName = viewName, ModTime = DateTime.Now, - ModBy = _ModByName + ModBy = modBy }; _RAPSContext.TblRoleMembers.Add(tblRoleMember); _RAPSContext.SaveChanges(); - _auditService.AuditRoleMemberChange(tblRoleMember, RAPSAuditService.AuditActionType.Create, string.Format(_AddComment, viewName)); + _auditService.AuditRoleMemberChange(tblRoleMember, RAPSAuditService.AuditActionType.Create, string.Format(_AddComment, viewName), modBy); _RAPSContext.SaveChanges(); transaction.Commit(); } - private void DeleteRoleMember(TblRoleMember deleteMember, string viewName) + private void DeleteRoleMember(TblRoleMember deleteMember, string viewName, string modBy) { _RAPSContext.TblRoleMembers.Remove(deleteMember); - _auditService.AuditRoleMemberChange(deleteMember, RAPSAuditService.AuditActionType.Delete, string.Format(_DeleteComment, viewName)); + _auditService.AuditRoleMemberChange(deleteMember, RAPSAuditService.AuditActionType.Delete, string.Format(_DeleteComment, viewName), modBy); _RAPSContext.SaveChanges(); } diff --git a/web/Areas/Scheduler/Controllers/.gitkeep b/web/Areas/Scheduler/Controllers/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/web/Areas/Scheduler/Models/ScheduledJobContext.cs b/web/Areas/Scheduler/Models/ScheduledJobContext.cs new file mode 100644 index 000000000..30610e8df --- /dev/null +++ b/web/Areas/Scheduler/Models/ScheduledJobContext.cs @@ -0,0 +1,49 @@ +using Hangfire.Console; +using Hangfire.Server; + +namespace Viper.Areas.Scheduler.Models; + +/// +/// Per-execution context handed to every +/// implementation. Background jobs have no HTTP context, so the framework +/// resolves the effective here and the job stamps audit +/// rows with that value rather than reaching for UserHelper.GetCurrentUser(). +/// +public sealed class ScheduledJobContext +{ + /// + /// Audit actor stamped on rows produced by scheduler-triggered runs. + /// 7 chars to fit the legacy tblRoleMembers.ModBy varchar(8) + /// column while staying distinct from the existing "__system" + /// convention. + /// + public const string SchedulerActor = "__sched"; + + private readonly PerformContext? _performContext; + + public ScheduledJobContext(string modBy, PerformContext? performContext = null) + { + if (string.IsNullOrWhiteSpace(modBy)) + { + throw new ArgumentException("modBy is required.", nameof(modBy)); + } + ModBy = modBy; + _performContext = performContext; + } + + /// + /// Audit actor. Always today; if a manual + /// trigger path is added later it can pass a real LoginId here. + /// + public string ModBy { get; } + + /// + /// Writes a line to the job's Hangfire dashboard console pane. No-op when + /// the job runs outside Hangfire (unit tests, manual invocation), so jobs + /// can call this unconditionally without nullable-context noise. + /// + public void WriteLine(string message) + { + _performContext?.WriteLine(message); + } +} diff --git a/web/Areas/Scheduler/README.md b/web/Areas/Scheduler/README.md new file mode 100644 index 000000000..70b91c4e0 --- /dev/null +++ b/web/Areas/Scheduler/README.md @@ -0,0 +1,175 @@ +# Scheduler + +Cron-driven background jobs for VIPER. Built on Hangfire 1.8 with SQL Server +storage; jobs are written against a thin `IScheduledJob` abstraction so they +do not depend on Hangfire types directly. + +This document is the operational source of truth for the scheduler: +how to add a job, how it is configured, and how to triage incidents. + +--- + +## Onboarding a job + +Every recurring job is a class that implements `IScheduledJob` and carries a +`[ScheduledJob]` attribute. Discovery happens at startup; there is no +manifest file to update. + +### 1. Declare the job + +Place the file under your area's `Jobs/` folder. Example, the RAPS +role-membership refresh: + +```csharp +// web/Areas/RAPS/Jobs/RapsRoleRefreshScheduledJob.cs +[ScheduledJob(id: "raps:role-refresh", cron: "0 0 * * *", TimeZoneId = "Pacific Standard Time")] +public sealed class RapsRoleRefreshScheduledJob : IScheduledJob +{ + private readonly RAPSContext _rapsContext; + private readonly ILogger _logger; + + public RapsRoleRefreshScheduledJob( + RAPSContext rapsContext, + ILogger logger) + { + _rapsContext = rapsContext; + _logger = logger; + } + + public async Task RunAsync(ScheduledJobContext context, CancellationToken ct) + { + var roleViews = new RoleViews(_rapsContext); + await roleViews.UpdateRoles(modBy: context.ModBy, debugOnly: false, ct: ct); + } +} +``` + +### 2. Naming rules + +| Field | Rule | +|---|---| +| `id` | `area:job-name` (e.g. `raps:role-refresh`). | +| `cron` | Five-field Hangfire cron (`m h dom mon dow`). | +| `TimeZoneId` | Defaults to `Pacific Standard Time`. UC Davis runs Windows; IANA aliases like `America/Los_Angeles` also work. | + +### 3. Stamping audit rows + +Background jobs run with no HTTP context, so `UserHelper.GetCurrentUser()` +is **not available**. Every job receives a `ScheduledJobContext` whose +`ModBy` property is the audit actor for this run — pass it through to +your service layer; do not derive it inside the job. + +`ModBy` is `"__sched"` (7 chars; the legacy `tblRoleMembers.ModBy` +column is `varchar(8)`, so the stamp is shortened to fit while staying +distinct from the existing `"__system"` convention). Existing audit +queries can filter on `WHERE ModBy = '__sched'` to isolate +scheduler-driven changes from human-driven changes. + +### 4. DI + +Job dependencies are resolved from a fresh DI scope per execution. Any +`Scoped` service (DbContexts, scoped services from Scrutor) works without +extra wiring — the discovery pass registers your job type as +`Scoped` for you. + +### 5. What runs where + +| Surface | Mechanism | +|---|---| +| Initial registration | At app startup, after Hangfire is mounted, every `[ScheduledJob]`-declared type is `AddOrUpdate`'d. Idempotent. | +| Subsequent registrations | A fresh deploy with new jobs picks them up on next startup. | + +--- + +## Configuration + +All settings live in `appsettings.{Environment}.json` (or AWS SSM +parameters in deployed environments). + +| Key | Purpose | Default | +|---|---|---| +| `Hangfire:Enabled` | Master switch. When `false`, no scheduler wiring runs and the dashboard is unreachable. | `true` | +| `Hangfire:AutoSchedule` | When `false`, recurring jobs register with `Cron.Never` so cron never fires. The worker still runs and the dashboard still mounts, so operators can fire jobs via "Trigger now" or `BackgroundJob.Enqueue`. Local dev sets this `false` to require manual triggering. | `true` | +| `ConnectionStrings:VIPER` | The database that hosts Hangfire's tables. Required when `Hangfire:Enabled=true`. | n/a | +| `IPAddressAllowlistConfiguration:InternalAllowlist` | Source-IP gate for `/health/detail` and the HealthChecks UI. Add SVM infra ranges + your office subnet. | localhost only | + +The dashboard does **not** read this config; it is always mounted at +`/scheduler/dashboard` when Hangfire is enabled and is gated by RAPS, +not IP. + +--- + +## Access + +| Surface | URL | Auth | +|---|---|---| +| Hangfire dashboard | `/scheduler/dashboard` | Cookie auth (CAS) + RAPS permission `SVMSecure.CATS.scheduledJobs` | +| Health (liveness) | `/health` | Anonymous (Jenkins polls it) | +| Health (detail) | `/health/detail` | IP-allowlisted to `InternalAllowlist` | + +`SVMSecure.CATS.scheduledJobs` is the same permission the legacy +ColdFusion VIPER scheduler (`cats/inc_scheduledTasks.cfm`) checks — +admins who already manage the legacy scheduler inherit access without a +provisioning step. + +### Dashboard add-ons + +Two Hangfire dashboard plugins enrich the operator experience: + +- **Hangfire.Console** — per-job console output appears inline on the + job's detail page. Jobs call `context.WriteLine(...)` on the + `ScheduledJobContext` they receive; the output is captured in storage + and rendered in the dashboard. +- **Hangfire.Heartbeat** — CPU, memory, and uptime metrics for each + registered worker are shown on the dashboard's Servers page (refreshed + every 30 s). Useful for spotting a stuck worker before users do. + +--- + +## Operations runbook + +### Heartbeat verification + +| Symptom | Where to look | +|---|---| +| "Is the scheduler alive?" | `/health/detail` — the `hangfire` check reports `Healthy` (one or more servers with recent heartbeats), `Degraded` (storage reachable but no servers), or `Unhealthy` (storage error or all heartbeats > 2 minutes stale). | +| "Did this job run?" | Dashboard → Recurring Jobs → row for the id; columns show last/next execution and last state. | +| "Are workers processing?" | Dashboard → Servers panel; heartbeats refresh every 30 seconds. | + +### Retrying a failed job + +1. Open `/scheduler/dashboard`. +2. Failed jobs appear in the **Failed** queue. +3. Click the job → **Requeue** to retry once, or **Delete** to discard. +4. Recurring jobs that fail still trigger on their next cron schedule + regardless — requeue is for retrying the specific failed + instance. + +### Pre-escalation checklist + +Before paging a developer, verify in this order: + +1. **Connection string** — `ConnectionStrings:VIPER` resolves and + the SQL login has read/write on the `HangFire` schema. +2. **Permission grant** — the user holds + `SVMSecure.CATS.scheduledJobs` (check RAPS). +3. **Server heartbeat** — `/health/detail` returns `hangfire` + `Healthy`. If `Degraded` (no servers), the worker process is down or + not started; confirm `Hangfire:Enabled=true` and check application + startup logs for `"Hangfire is enabled but ConnectionStrings:VIPER is + empty"`. +4. **Recent deploys** — a job that disappeared after a deploy is + re-registered on the next app startup (idempotent `AddOrUpdate`). + Restart the app to force it. + +--- + +## Related code + +| Concern | Location | +|---|---| +| Hangfire wiring (DI, dashboard mount) | `web/Classes/Scheduler/HangfireExtensions.cs` | +| Dashboard auth filter | `web/Classes/Scheduler/HangfireDashboardAuthorizationFilter.cs` | +| Per-job logging filter | `web/Classes/Scheduler/HangfireJobLoggingFilter.cs` | +| Health check | `web/Classes/HealthChecks/HangfireHealthCheck.cs` | +| Job abstraction | `web/Areas/Scheduler/Services/IScheduledJob.cs`, `ScheduledJobAttribute.cs`, `ScheduledJobDiscovery.cs`, `ScheduledJobRunner.cs` | diff --git a/web/Areas/Scheduler/Services/.gitkeep b/web/Areas/Scheduler/Services/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/web/Areas/Scheduler/Services/IScheduledJob.cs b/web/Areas/Scheduler/Services/IScheduledJob.cs new file mode 100644 index 000000000..9690e3852 --- /dev/null +++ b/web/Areas/Scheduler/Services/IScheduledJob.cs @@ -0,0 +1,18 @@ +using Viper.Areas.Scheduler.Models; + +namespace Viper.Areas.Scheduler.Services; + +/// +/// Contract every cron-driven background job in VIPER implements. The +/// scheduler discovers IScheduledJob registrations at startup and +/// wires each one into Hangfire based on its . +/// Implementations should not call UserHelper.GetCurrentUser(); the +/// effective audit actor is supplied via the per-run context. +/// +public interface IScheduledJob +{ + /// Executes one run of the scheduled job. + /// Per-run trigger source and audit actor. + /// Cancellation token honored by long-running jobs. + Task RunAsync(ScheduledJobContext context, CancellationToken ct); +} diff --git a/web/Areas/Scheduler/Services/ScheduledJobAttribute.cs b/web/Areas/Scheduler/Services/ScheduledJobAttribute.cs new file mode 100644 index 000000000..fb2baff13 --- /dev/null +++ b/web/Areas/Scheduler/Services/ScheduledJobAttribute.cs @@ -0,0 +1,28 @@ +namespace Viper.Areas.Scheduler.Services; + +/// +/// Declares the cron schedule and recurring-job id of an . +/// The discovery pass at startup reads this attribute and registers the job +/// with Hangfire. +/// +[AttributeUsage(AttributeTargets.Class, AllowMultiple = false, Inherited = false)] +public sealed class ScheduledJobAttribute : Attribute +{ + public ScheduledJobAttribute(string id, string cron) + { + Id = id; + Cron = cron; + } + + /// Recurring-job id. + public string Id { get; } + + /// Hangfire cron expression (5 or 6 field). + public string Cron { get; } + + /// + /// IANA or Windows time zone id. Defaults to Pacific (UC Davis); set + /// explicitly when a job needs a different reference time zone. + /// + public string TimeZoneId { get; set; } = "Pacific Standard Time"; +} diff --git a/web/Areas/Scheduler/Services/ScheduledJobDiscovery.cs b/web/Areas/Scheduler/Services/ScheduledJobDiscovery.cs new file mode 100644 index 000000000..8cedf1629 --- /dev/null +++ b/web/Areas/Scheduler/Services/ScheduledJobDiscovery.cs @@ -0,0 +1,118 @@ +using System.Reflection; +using Hangfire; + +namespace Viper.Areas.Scheduler.Services; + +/// +/// Discovers implementations annotated with +/// and produces a registry plus DI +/// registrations for each. +/// +public static class ScheduledJobDiscovery +{ + /// + /// Scans the supplied assemblies for concrete classes that implement + /// and carry a , + /// registers each job type and the runner with DI, and returns the + /// resulting metadata. + /// + /// + /// Thrown when two jobs declare the same id. + /// + public static IReadOnlyList RegisterScheduledJobs( + IServiceCollection services, + IEnumerable assemblies) + { + var found = new Dictionary(StringComparer.Ordinal); + + foreach (var asm in assemblies) + { + foreach (var type in asm.GetTypes()) + { + if (type.IsAbstract || type.IsInterface) + { + continue; + } + if (!typeof(IScheduledJob).IsAssignableFrom(type)) + { + continue; + } + + var attr = type.GetCustomAttribute(); + if (attr == null) + { + throw new InvalidOperationException( + $"{type.FullName} implements IScheduledJob but is missing [ScheduledJob]; either add the attribute or unregister the type."); + } + + if (found.TryGetValue(attr.Id, out var existing)) + { + throw new InvalidOperationException( + $"Duplicate scheduled-job id '{attr.Id}' on {type.FullName} (already declared by {existing.JobType.FullName})."); + } + + services.AddScoped(type); + found[attr.Id] = new ScheduledJobMetadata( + type, + attr.Id, + attr.Cron, + attr.TimeZoneId); + } + } + + var snapshot = new System.Collections.ObjectModel.ReadOnlyDictionary(found); + services.AddSingleton(new ScheduledJobRegistry(snapshot)); + services.AddTransient(); + + return [.. found.Values]; + } + + /// + /// Calls for each declared + /// job. Idempotent: safe to invoke on every startup. When + /// is false, every job is registered with + /// so it is visible in the dashboard but never + /// fires on a schedule (operators can still use "Trigger now" or + /// BackgroundJob.Enqueue). + /// + public static void RegisterRecurringJobs( + IRecurringJobManager manager, + IEnumerable jobs, + bool autoSchedule = true) + { + foreach (var meta in jobs) + { + var cron = autoSchedule ? meta.Cron : Cron.Never(); + // PerformContext is null in the expression; Hangfire substitutes + // the real context at execution time (same pattern as IJobCancellationToken). + manager.AddOrUpdate( + meta.Id, + runner => runner.RunAsync(meta.Id, JobCancellationToken.Null, null!), + cron, + new RecurringJobOptions + { + TimeZone = ResolveTimeZone(meta.TimeZoneId), + }); + } + } + + private static readonly NLog.Logger _logger = NLog.LogManager.GetCurrentClassLogger(); + + private static TimeZoneInfo ResolveTimeZone(string id) + { + try + { + return TimeZoneInfo.FindSystemTimeZoneById(id); + } + catch (TimeZoneNotFoundException ex) + { + _logger.Warn(ex, "Scheduled job timezone '{0}' not found; falling back to UTC.", id); + return TimeZoneInfo.Utc; + } + catch (InvalidTimeZoneException ex) + { + _logger.Warn(ex, "Scheduled job timezone '{0}' is invalid; falling back to UTC.", id); + return TimeZoneInfo.Utc; + } + } +} diff --git a/web/Areas/Scheduler/Services/ScheduledJobRegistry.cs b/web/Areas/Scheduler/Services/ScheduledJobRegistry.cs new file mode 100644 index 000000000..c223d18e3 --- /dev/null +++ b/web/Areas/Scheduler/Services/ScheduledJobRegistry.cs @@ -0,0 +1,45 @@ +namespace Viper.Areas.Scheduler.Services; + +/// +/// One declaration of a scheduled job, materialized from the +/// on its implementing type. Used by the +/// startup registrar to wire Hangfire. +/// +public sealed class ScheduledJobMetadata +{ + public ScheduledJobMetadata( + Type jobType, + string id, + string cron, + string timeZoneId) + { + JobType = jobType; + Id = id; + Cron = cron; + TimeZoneId = timeZoneId; + } + + public Type JobType { get; } + public string Id { get; } + public string Cron { get; } + public string TimeZoneId { get; } +} + +/// +/// Frozen view of every declared in the running +/// process, indexed by recurring-job id. Populated once at startup. +/// +public interface IScheduledJobRegistry +{ + IReadOnlyDictionary JobsById { get; } +} + +public sealed class ScheduledJobRegistry : IScheduledJobRegistry +{ + public ScheduledJobRegistry(IReadOnlyDictionary jobsById) + { + JobsById = jobsById; + } + + public IReadOnlyDictionary JobsById { get; } +} diff --git a/web/Areas/Scheduler/Services/ScheduledJobRunner.cs b/web/Areas/Scheduler/Services/ScheduledJobRunner.cs new file mode 100644 index 000000000..a7f802f5c --- /dev/null +++ b/web/Areas/Scheduler/Services/ScheduledJobRunner.cs @@ -0,0 +1,60 @@ +using Hangfire; +using Hangfire.Server; +using Viper.Areas.Scheduler.Models; +using Viper.Classes.Utilities; + +namespace Viper.Areas.Scheduler.Services; + +/// +/// Hangfire-side dispatcher. Hangfire cannot serialize a method call against +/// an interface type, so every recurring registration targets this concrete +/// class. The runner resolves the actual from DI +/// by id at execution time and hands it a +/// stamped with the system actor. +/// +public sealed class ScheduledJobRunner +{ + private readonly IServiceScopeFactory _scopeFactory; + private readonly ILogger _logger; + + public ScheduledJobRunner( + IServiceScopeFactory scopeFactory, + ILogger logger) + { + _scopeFactory = scopeFactory; + _logger = logger; + } + + /// + /// Hangfire invokes this method with the scheduled job's id. The + /// parameter is replaced by Hangfire + /// at runtime with one tied to the server's shutdown signal so jobs can + /// honor cooperative cancellation. The is + /// also injected by Hangfire and threaded into + /// so jobs can write to the dashboard console via context.WriteLine. + /// A fresh DI scope is created per execution so each run gets its own + /// DbContext. + /// + public async Task RunAsync( + string jobId, + IJobCancellationToken cancellationToken, + PerformContext? performContext) + { + using var scope = _scopeFactory.CreateScope(); + var registry = scope.ServiceProvider.GetRequiredService(); + if (!registry.JobsById.TryGetValue(jobId, out var metadata)) + { + _logger.LogWarning( + "ScheduledJobRunner invoked with unknown id {JobId}; no IScheduledJob registered. Skipping.", + LogSanitizer.SanitizeString(jobId)); + return; + } + + var job = (IScheduledJob)scope.ServiceProvider.GetRequiredService(metadata.JobType); + var context = new ScheduledJobContext( + ScheduledJobContext.SchedulerActor, + performContext); + + await job.RunAsync(context, cancellationToken.ShutdownToken); + } +} diff --git a/web/Classes/ForwardedHeadersExtensions.cs b/web/Classes/ForwardedHeadersExtensions.cs new file mode 100644 index 000000000..db52dd5e0 --- /dev/null +++ b/web/Classes/ForwardedHeadersExtensions.cs @@ -0,0 +1,73 @@ +using Microsoft.AspNetCore.HttpOverrides; +using NLog; +using System.Net; +using Viper.Classes.Utilities; + +namespace Viper.Classes +{ + /// + /// Wires for the test/prod proxy + /// chain (User -> Cloudflare -> F5 -> app). Kept out of Program.cs so + /// Main stays small. + /// + public static class ForwardedHeadersExtensions + { + // The F5's internal IP. Static so it's only parsed once per process. + private static readonly IPAddress F5InternalIp = IPAddress.Parse("192.168.56.134"); + + /// + /// Registers ForwardedHeadersOptions with the F5 + Cloudflare CIDRs as + /// trusted proxies. No-op in Development (UseForwardedHeaders isn't + /// applied there either, and we want to avoid the cloudflare.com fetch + /// on every local startup). + /// + public static IServiceCollection AddViperForwardedHeaders( + this IServiceCollection services, + IHostEnvironment environment, + Logger logger) + { + if (environment.IsDevelopment()) + { + return services; + } + + var cloudflareCidrs = CloudflareNetworks.FetchOrFallback(logger); + services.Configure(options => + { + options.ForwardedHeaders = + ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto; + options.KnownProxies.Add(F5InternalIp); + + // Cloudflare fronts vetmed.ucdavis.edu. The chain is + // User -> Cloudflare -> F5 -> app, so the middleware must walk two + // proxy hops to land on the real client IP. Default ForwardLimit + // is 1, which stops at the CF edge - bump to 2. + options.ForwardLimit = 2; + AddCloudflareCidrs(options, cloudflareCidrs, logger); + }); + + return services; + } + + // cidrs come from cloudflare.com (or our hardcoded fallback). A single + // malformed entry in the live response shouldn't crash startup - skip + // it and keep the rest of the allowlist. + private static void AddCloudflareCidrs( + ForwardedHeadersOptions options, + IEnumerable cidrs, + Logger logger) + { + foreach (var cidr in cidrs) + { + try + { + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse(cidr)); + } + catch (FormatException ex) + { + logger.Warn(ex, "Skipping invalid Cloudflare CIDR: {Cidr}", LogSanitizer.SanitizeString(cidr)); + } + } + } + } +} diff --git a/web/Classes/HealthChecks/HangfireHealthCheck.cs b/web/Classes/HealthChecks/HangfireHealthCheck.cs new file mode 100644 index 000000000..900bdc873 --- /dev/null +++ b/web/Classes/HealthChecks/HangfireHealthCheck.cs @@ -0,0 +1,93 @@ +using Hangfire; +using Hangfire.Storage; +using Hangfire.Storage.Monitoring; +using Microsoft.Extensions.Diagnostics.HealthChecks; + +namespace Viper.Classes.HealthChecks +{ + /// + /// Reports the state of the Hangfire job system: storage reachability, + /// registered server count, and heartbeat freshness. Only registered when + /// Hangfire is enabled, so JobStorage is guaranteed to be in DI. + /// + public class HangfireHealthCheck : IHealthCheck + { + private static readonly NLog.Logger _logger = NLog.LogManager.GetCurrentClassLogger(); + + // Hangfire's default heartbeat interval is 30s; 2 minutes covers a few + // missed beats before we call a server stale. + private static readonly TimeSpan StaleHeartbeatThreshold = TimeSpan.FromMinutes(2); + + private readonly JobStorage _storage; + + public HangfireHealthCheck(JobStorage storage) + { + _storage = storage; + } + + /// + /// Probes Hangfire storage and server heartbeats, reporting Healthy, + /// Degraded (no servers), or Unhealthy (storage error or stale heartbeats). + /// + public Task CheckHealthAsync( + HealthCheckContext context, + CancellationToken cancellationToken = default) + { + IMonitoringApi monitoringApi; + StatisticsDto stats; + IList servers; + try + { + monitoringApi = _storage.GetMonitoringApi(); + stats = monitoringApi.GetStatistics(); + servers = monitoringApi.Servers(); + } + // Hangfire wraps SqlException/TimeoutException/InvalidOperationException + // and more inside its storage layer; letting any escape would crash the + // health-response writer, so a broad catch is the right call here. + catch (Exception ex) + { + _logger.Error(ex, "Hangfire health check failed to query storage"); + return Task.FromResult(HealthCheckResult.Unhealthy( + "Hangfire storage unreachable.", exception: ex)); + } + + var data = new Dictionary + { + ["servers"] = stats.Servers, + ["enqueued"] = stats.Enqueued, + ["scheduled"] = stats.Scheduled, + ["processing"] = stats.Processing, + ["failed"] = stats.Failed, + ["recurring"] = stats.Recurring, + }; + + if (servers.Count == 0) + { + return Task.FromResult(HealthCheckResult.Degraded( + "Hangfire storage reachable but no servers registered.", data: data)); + } + + var now = DateTime.UtcNow; + // Treat a null Heartbeat as maximally stale so a server that never + // reported in still trips the stale-heartbeat path. + var ages = servers + .Select(s => s.Heartbeat.HasValue ? now - s.Heartbeat.Value : TimeSpan.MaxValue) + .ToList(); + var oldestAge = ages.Max(); + + if (ages.All(age => age > StaleHeartbeatThreshold)) + { + var formatted = oldestAge == TimeSpan.MaxValue + ? "never" + : oldestAge.ToString("hh\\:mm\\:ss"); + return Task.FromResult(HealthCheckResult.Unhealthy( + $"Hangfire servers registered but heartbeats are stale (oldest: {formatted}).", + data: data)); + } + + return Task.FromResult(HealthCheckResult.Healthy( + $"Hangfire OK: {servers.Count} server(s).", data: data)); + } + } +} diff --git a/web/Classes/Scheduler/HangfireDashboardAuthorizationFilter.cs b/web/Classes/Scheduler/HangfireDashboardAuthorizationFilter.cs new file mode 100644 index 000000000..dc59e9128 --- /dev/null +++ b/web/Classes/Scheduler/HangfireDashboardAuthorizationFilter.cs @@ -0,0 +1,52 @@ +using Hangfire.Dashboard; +using Viper.Classes.SQLContext; + +namespace Viper.Classes.Scheduler +{ + /// + /// Gate the Hangfire dashboard on the same RAPS permission used by the + /// scheduler API (SVMSecure.CATS.scheduledJobs). + /// Unauthenticated users are handled upstream by + /// RequireAuthorization() on the mapped endpoint, which triggers + /// the cookie auth challenge and redirects to /login (and on to + /// CAS); this filter therefore only sees authenticated principals and + /// decides authorize-or-403 based on permission membership. When the + /// filter returns false for an authenticated user, Hangfire's middleware + /// writes a 403. + /// + public sealed class HangfireDashboardAuthorizationFilter : IDashboardAuthorizationFilter + { + public const string SchedulerPermission = "SVMSecure.CATS.scheduledJobs"; + + public bool Authorize(DashboardContext context) + { + var httpContext = context.GetHttpContext(); + var user = httpContext.User; + + if (user.Identity == null || !user.Identity.IsAuthenticated) + { + return false; + } + + var services = httpContext.RequestServices; + var userHelper = services.GetService(); + var rapsContext = services.GetService(); + var aaudContext = services.GetService(); + + if (userHelper == null || rapsContext == null || aaudContext == null) + { + return false; + } + + var loginId = user.Identity.Name; + if (string.IsNullOrEmpty(loginId)) + { + return false; + } + + var aaudUser = userHelper.GetByLoginId(aaudContext, loginId); + return aaudUser != null + && userHelper.HasPermission(rapsContext, aaudUser, SchedulerPermission); + } + } +} diff --git a/web/Classes/Scheduler/HangfireExtensions.cs b/web/Classes/Scheduler/HangfireExtensions.cs new file mode 100644 index 000000000..f76454dd3 --- /dev/null +++ b/web/Classes/Scheduler/HangfireExtensions.cs @@ -0,0 +1,132 @@ +using System.Reflection; +using Hangfire; +using Hangfire.Console; +using Hangfire.Heartbeat; +using Hangfire.Heartbeat.Server; +using Hangfire.Server; +using NLog; +using Viper.Areas.Scheduler.Services; +using Viper.Classes.HealthChecks; + +namespace Viper.Classes.Scheduler +{ + /// + /// DI + pipeline wiring for Hangfire. Gated by Hangfire:Enabled; + /// when the flag is false the rest of the web app continues to start + /// normally. + /// + public static class HangfireExtensions + { + public const string DashboardPath = "/scheduler/dashboard"; + private const string EnabledKey = "Hangfire:Enabled"; + private const string AutoScheduleKey = "Hangfire:AutoSchedule"; + private const string DashboardAppPathKey = "Hangfire:DashboardAppPath"; + + /// + /// Registers Hangfire services + the background server when + /// Hangfire:Enabled is true. Hangfire's tables live in the + /// VIPER database under the HangFire schema (auto-migrated on first + /// server start). When Hangfire:AutoSchedule is false (e.g. + /// local dev), the worker still runs and the dashboard still mounts, + /// but recurring jobs are registered with Cron.Never so cron + /// never fires; jobs are visible in the dashboard and can be invoked + /// via "Trigger now" or BackgroundJob.Enqueue. + /// + public static IServiceCollection AddViperHangfire( + this IServiceCollection services, + IConfiguration configuration, + Logger logger) + { + if (!configuration.GetValue(EnabledKey)) + { + return services; + } + + var connectionString = configuration.GetConnectionString("VIPER"); + if (string.IsNullOrWhiteSpace(connectionString)) + { + logger.Error( + "Hangfire is enabled but ConnectionStrings:VIPER is empty. " + + "Hangfire will be disabled for this process."); + return services; + } + + // Filter is a singleton so a single ILogger instance and any future + // shared state (counters, etc.) stay process-wide. + services.AddSingleton(); + services.AddHangfire((sp, config) => config + .UseSqlServerStorage(connectionString, new Hangfire.SqlServer.SqlServerStorageOptions + { + SchemaName = "HangFire", + }) + .UseFilter(sp.GetRequiredService()) + // Hangfire.Console: per-job execution logs in the dashboard + // (jobs accept PerformContext and call context.WriteLine). + .UseConsole() + // Hangfire.Heartbeat: dashboard tab that renders server + // metrics. The recorder is added below as a background process + // on the worker; both must run for the graph to populate. + .UseHeartbeatPage(checkInterval: TimeSpan.FromSeconds(30))); + + // ProcessMonitor (from Hangfire.Heartbeat) records the CPU/RAM/ + // uptime metrics rendered by UseHeartbeatPage. Registering it as + // an IBackgroundProcess in DI lets AddHangfireServer pick it up + // alongside the default workers without us having to pass storage + // explicitly (the overload that takes additionalProcesses also + // requires a non-null JobStorage). + services.AddSingleton(_ => new ProcessMonitor(checkInterval: TimeSpan.FromSeconds(30))); + services.AddHangfireServer(); + + // Hangfire-specific check piggybacks on the /health/detail "ready" + // surface stood up in PR 0. Only registered when Hangfire itself is + // wired so /health/detail doesn't claim a missing subsystem is down. + services.AddHealthChecks() + .AddCheck("hangfire", tags: new[] { "ready" }); + + ScheduledJobDiscovery.RegisterScheduledJobs( + services, + new[] { Assembly.GetExecutingAssembly() }); + + return services; + } + + /// + /// Mounts the Hangfire dashboard at when + /// Hangfire is actually registered (i.e. + /// ran successfully). Unauthenticated visitors hit + /// RequireAuthorization() first and are redirected to CAS via + /// the cookie auth challenge; authenticated visitors are gated by + /// , which checks + /// the SVMSecure.CATS.scheduledJobs RAPS permission. Call AFTER + /// UseRouting / UseAuthentication / UseAuthorization. + /// + public static WebApplication UseViperHangfire(this WebApplication app) + { + if (app.Services.GetService() == null) + { + return app; + } + + // Hangfire renders the configured AppPath as-is in the dashboard + // "Back to site" link. With no UsePathBase middleware, we need to + // supply the /2 deployment prefix from configuration ourselves. + var dashboardAppPath = app.Configuration.GetValue(DashboardAppPathKey) ?? "/Computing"; + app.MapHangfireDashboard(DashboardPath, new DashboardOptions + { + Authorization = new[] { new HangfireDashboardAuthorizationFilter() }, + DashboardTitle = "VIPER Scheduler", + AppPath = dashboardAppPath, + }).RequireAuthorization(); + + // When AutoSchedule is off (dev), register every recurring job + // with Cron.Never so the dashboard still shows them and operators + // can fire them via "Trigger now" or BackgroundJob.Enqueue, but + // nothing fires on its own. + var autoSchedule = app.Configuration.GetValue(AutoScheduleKey) ?? true; + var recurringJobManager = app.Services.GetRequiredService(); + var registry = app.Services.GetRequiredService(); + ScheduledJobDiscovery.RegisterRecurringJobs(recurringJobManager, registry.JobsById.Values, autoSchedule); + return app; + } + } +} diff --git a/web/Classes/Scheduler/HangfireJobLoggingFilter.cs b/web/Classes/Scheduler/HangfireJobLoggingFilter.cs new file mode 100644 index 000000000..d98189ca6 --- /dev/null +++ b/web/Classes/Scheduler/HangfireJobLoggingFilter.cs @@ -0,0 +1,78 @@ +using Hangfire.Server; +using Viper.Classes.Utilities; + +namespace Viper.Classes.Scheduler +{ + /// + /// Hangfire server filter that wraps every job execution in a structured + /// logging scope (jobId / recurringJobId) and emits start/complete/error + /// log entries. All user-influenced values are run through + /// before being logged. + /// + public sealed class HangfireJobLoggingFilter : IServerFilter + { + private const string ScopeKey = "__HangfireLoggingScope"; + + private readonly ILogger _logger; + + public HangfireJobLoggingFilter(ILogger logger) + { + _logger = logger; + } + + public void OnPerforming(PerformingContext context) + { + var jobId = LogSanitizer.SanitizeId(context.BackgroundJob.Id); + var recurringJobId = LogSanitizer.SanitizeString(context.GetJobParameter("RecurringJobId")); + + // Stash the scope on context.Items because OnPerforming and OnPerformed + // are separate calls; a `using` block here would dispose too early. + var scope = _logger.BeginScope(new Dictionary + { + ["jobId"] = jobId ?? string.Empty, + ["recurringJobId"] = recurringJobId ?? string.Empty + }); + + if (scope != null) + { + context.Items[ScopeKey] = scope; + } + + // Log only argument metadata (count). Logging values risks leaking + // secrets/PII even after control-char sanitization, so the value + // strings are intentionally not emitted. + var argCount = context.BackgroundJob.Job.Args?.Count ?? 0; + + _logger.LogInformation( + "Hangfire job starting: {JobType}.{JobMethod} (argCount={ArgCount})", + LogSanitizer.SanitizeString(context.BackgroundJob.Job.Type.FullName), + LogSanitizer.SanitizeString(context.BackgroundJob.Job.Method.Name), + argCount); + } + + public void OnPerformed(PerformedContext context) + { + // Dispose the scope stashed by OnPerforming via `using` so the + // structured logging fields stay attached for the log lines below + // and are released at method exit. + using var scope = context.Items.TryGetValue(ScopeKey, out var stashed) + ? stashed as IDisposable + : null; + try + { + if (context.Exception != null) + { + _logger.LogError(context.Exception, "Hangfire job threw exception"); + } + else + { + _logger.LogInformation("Hangfire job completed"); + } + } + finally + { + context.Items.Remove(ScopeKey); + } + } + } +} diff --git a/web/Program.cs b/web/Program.cs index 5fee3318b..8baab4fa2 100644 --- a/web/Program.cs +++ b/web/Program.cs @@ -6,7 +6,6 @@ using Microsoft.AspNetCore.Authentication.Cookies; using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.DataProtection; -using Microsoft.AspNetCore.HttpOverrides; using Microsoft.AspNetCore.Rewrite; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Caching.Memory; @@ -24,6 +23,7 @@ using Viper; using Viper.Classes; using Viper.Classes.HealthChecks; +using Viper.Classes.Scheduler; using Viper.Classes.SQLContext; using Web; using Web.Authorization; @@ -80,39 +80,9 @@ logger.Fatal(ex, "Failed to get secrets from AWS"); } - //Use forwarded for headers on test and prod. UseForwardedHeaders is - //only enabled outside Development (see below), so skip the cloudflare.com - //fetch in dev to avoid a network call on every local startup. - if (!builder.Environment.IsDevelopment()) - { - var cloudflareCidrs = CloudflareNetworks.FetchOrFallback(logger); - builder.Services.Configure(options => - { - options.ForwardedHeaders = - ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto; - options.KnownProxies.Add(IPAddress.Parse("192.168.56.134")); //The F5's internal IP - - // Cloudflare fronts vetmed.ucdavis.edu. The chain is - // User -> Cloudflare -> F5 -> app, so the middleware must walk two - // proxy hops to land on the real client IP. Default ForwardLimit - // is 1, which stops at the CF edge - bump to 2. - options.ForwardLimit = 2; - foreach (var cidr in cloudflareCidrs) - { - // cidrs come from cloudflare.com (or our hardcoded fallback). A - // single malformed entry in the live response shouldn't crash - // startup - skip it and keep the rest of the allowlist. - try - { - options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse(cidr)); - } - catch (FormatException ex) - { - logger.Warn(ex, "Skipping invalid Cloudflare CIDR: {Cidr}", cidr); - } - } - }); - } + // Forwarded-headers wiring (Cloudflare + F5 trusted proxies). No-op + // in Development. See ForwardedHeadersExtensions. + builder.Services.AddViperForwardedHeaders(builder.Environment, logger); // Add services to the container. builder.Services.AddControllersWithViews(options => @@ -310,6 +280,9 @@ void RegisterDbContext(string connectionStringKey) where TContext : Db // All health-check DI wiring lives in HealthCheckExtensions. builder.Services.AddViperHealthChecks(builder.Configuration, builder.Environment); + // Hangfire scheduler. No-op when Hangfire:Enabled is false. + builder.Services.AddViperHangfire(builder.Configuration, logger); + // Add HttpClient for Vite proxy (development only) if (builder.Environment.IsDevelopment()) { @@ -415,6 +388,11 @@ void RegisterDbContext(string connectionStringKey) where TContext : Db } + // Re-execute bare status-code responses (403, 404, etc.) through HomeController.Error + // so middleware that writes raw status codes — e.g. Hangfire's dashboard middleware + // when our auth filter denies — gets the same Razor error view as the rest of the app. + app.UseStatusCodePagesWithReExecute("/Error/{0}"); + // In development, set up Vite proxy BEFORE rewrite rules so it can handle .ts/.js files if (app.Environment.IsDevelopment()) { @@ -507,6 +485,9 @@ void RegisterDbContext(string connectionStringKey) where TContext : Db // All health-check pipeline wiring lives in HealthCheckExtensions. app.UseViperHealthChecks(); + // Hangfire dashboard. No-op unless AddViperHangfire actually registered. + app.UseViperHangfire(); + // Define the default route mapping and require authentication by default (fail safe) app.MapControllerRoute( name: "areas", diff --git a/web/Views/Shared/Components/MainNav/MainNav.cs b/web/Views/Shared/Components/MainNav/MainNav.cs index 9021575f6..79a9bb525 100644 --- a/web/Views/Shared/Components/MainNav/MainNav.cs +++ b/web/Views/Shared/Components/MainNav/MainNav.cs @@ -64,6 +64,7 @@ public async Task InvokeAsync(AaudUser user) { "raps" => "Computing", "policy" => "Policies", + "scheduler" => "Computing", _ => "VIPER Home", }; return await Task.Run(() => View("Default", user)); diff --git a/web/Viper.csproj b/web/Viper.csproj index 13f079499..bd9188fef 100644 --- a/web/Viper.csproj +++ b/web/Viper.csproj @@ -49,6 +49,10 @@ + + + + @@ -89,6 +93,8 @@ + + diff --git a/web/appsettings.Development.json b/web/appsettings.Development.json index a435e7a79..21bc6cff8 100644 --- a/web/appsettings.Development.json +++ b/web/appsettings.Development.json @@ -22,6 +22,9 @@ "Cas": { "CasBaseUrl": "https://ssodev.ucdavis.edu/cas/" }, + "Hangfire": { + "AutoSchedule": false + }, "EmailSettings": { "SmtpHost": "localhost", "SmtpPort": 1025, diff --git a/web/appsettings.Production.json b/web/appsettings.Production.json index a62a561aa..3ac032657 100644 --- a/web/appsettings.Production.json +++ b/web/appsettings.Production.json @@ -25,5 +25,8 @@ "DefaultFromAddress": "svmithelp@ucdavis.edu", "UseMailpit": false, "BaseUrl": "https://viper.vetmed.ucdavis.edu/2" + }, + "Hangfire": { + "DashboardAppPath": "/2/Computing" } } diff --git a/web/appsettings.Test.json b/web/appsettings.Test.json index 1e969e14e..1eb01c026 100644 --- a/web/appsettings.Test.json +++ b/web/appsettings.Test.json @@ -33,5 +33,8 @@ "AWS": { "ProfilesLocation": "P:\\viper.net\\awscredentials", "Profile": "default" + }, + "Hangfire": { + "DashboardAppPath": "/2/Computing" } } diff --git a/web/appsettings.json b/web/appsettings.json index a0e27d6a4..39f5bbfe9 100644 --- a/web/appsettings.json +++ b/web/appsettings.json @@ -35,5 +35,9 @@ "EffortSettings": { "VerificationEmailSubject": "Action required, timely ask - Effort data verification", "VerificationReplyDays": 7 + }, + "Hangfire": { + // Hangfire's tables live in ConnectionStrings:VIPER. + "Enabled": true } }