From 764e56b9a5d9d55ca5c453772f2af0fee39a98a5 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Tue, 9 Jun 2026 23:16:08 +0800 Subject: [PATCH 1/2] fix(query_incidents): make the since/until time window optional "Find current open incidents" with no time window is a common, reasonable ask, but query_incidents hard-required both since and until and rejected it with "both since and until are required" (a failure mode the code itself flagged in a comment). Now: both omitted -> default to the last 30 days (under the backend's 31-day span cap); only `until` given -> still errors; a bare `since` keeps the existing "until defaults to now" behavior. The shared validateTimeWindow is unchanged, so query_changes is unaffected. --- pkg/flashduty/incidents.go | 44 ++++++- pkg/flashduty/incidents_window_test.go | 169 +++++++++++++++++++++++++ pkg/flashduty/time_args.go | 22 ++++ 3 files changed, 233 insertions(+), 2 deletions(-) create mode 100644 pkg/flashduty/incidents_window_test.go diff --git a/pkg/flashduty/incidents.go b/pkg/flashduty/incidents.go index 8f06748..87cc25d 100644 --- a/pkg/flashduty/incidents.go +++ b/pkg/flashduty/incidents.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strings" + "time" flashduty "github.com/flashcatcloud/go-flashduty" "github.com/mark3labs/mcp-go/mcp" @@ -18,6 +19,28 @@ const defaultQueryLimit = 20 const queryIncidentsDescription = `Query incidents by IDs, short ids (nums), time range, status, severity, channel, or free-text query. Returns the incident list with an alerts_total count per incident; for the actual alert objects of one or more incidents, call query_incident_alerts(incident_ids=...).` +// incidentSinceDescription / incidentUntilDescription override the shared +// SinceDescription / UntilDescription for query_incidents, where the window is +// optional: omit both bounds to query "current" open incidents and the tool +// defaults to the last 30 days. (query_changes keeps the shared wording.) +const ( + incidentSinceDescription = "OPTIONAL lower bound of the query window. " + + "Omit BOTH since and until to default to the last 30 days — the natural " + + "choice for \"current\" / open incidents. " + + "PREFER relative durations like \"24h\", \"7d\", \"30m\" — they are anchored " + + "to server time and immune to your training-data cutoff. " + + "Use absolute dates (\"2026-04-01\" or \"2026-04-01 10:00:00\") ONLY when " + + "the user explicitly asked for a specific calendar date; double-check the " + + "year, since picking the wrong year returns silently incorrect data. " + + "Also accepts unix seconds (\"1712000000\") and \"now\". " + + "Max window (until - since): 31 days. Data older than ~90 days may have been purged." + + incidentUntilDescription = "OPTIONAL upper bound of the query window. Same formats as `since`, plus " + + "future durations like \"+24h\", \"+7d\". Defaults to \"now\" when omitted. " + + "Omit BOTH since and until to default to the last 30 days. " + + "Must be greater than `since` and within 31 days of it." +) + // QueryIncidents creates a tool to query incidents with enriched data func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { return mcp.NewTool("query_incidents", @@ -30,8 +53,8 @@ func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHe mcp.WithString("progress", mcp.Description("Filter by status. Valid values: Triggered, Processing, Closed. Comma-separated for multiple."), mcp.Enum("Triggered", "Processing", "Closed", "Triggered,Processing", "Processing,Closed", "Triggered,Closed", "Triggered,Processing,Closed")), mcp.WithString("severity", mcp.Description("Filter by severity level. Valid values: Info, Warning, Critical."), mcp.Enum("Info", "Warning", "Critical")), mcp.WithString("channel_ids", mcp.Description("Comma-separated collaboration space IDs to filter by. Backend expects an array — singular channel_id is silently ignored.")), - WithSince(), - WithUntil(), + WithSince(mcp.Description(incidentSinceDescription)), + WithUntil(mcp.Description(incidentUntilDescription)), mcp.WithString("query", mcp.Description("Free-text search across title, labels, and content (Doris full-text). A 24-char hex string is resolved as an incident ID; a 6-char string is resolved as an incident num. Prefer this over picking exact filter values when the user gives a fuzzy keyword."), mcp.MaxLength(200)), mcp.WithString("nums", mcp.Description("Comma-separated short incident ids (num — the 6-char id shown in the UI, e.g. 311510). Matched within the since/until window; the backend caps the list span at ~30 days, so incidents older than that must be looked up by their full incident_id.")), mcp.WithNumber("limit", mcp.Description(LimitDescription), mcp.DefaultNumber(20), mcp.Min(1), mcp.Max(100)), @@ -59,6 +82,23 @@ func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHe return mcp.NewToolResultError(fmt.Sprintf("invalid until: %v", err)), nil } + // "current open incidents" with no window is the common ask, so when + // BOTH bounds are omitted default to the last 30 days (under the + // 31-day backend cap) instead of rejecting the call. A bare `until` + // is fine (it documents a "now" default, already applied by + // parseUntilArg); a bare `since` is still a real mistake worth an + // error. parseUntilArg has collapsed a missing `until` into "now", so + // detect omission from the raw args, not the parsed values. + sinceProvided := argProvided(args["since"]) + untilProvided := argProvided(args["until"]) + if !sinceProvided { + if untilProvided { + return mcp.NewToolResultError("`since` is required when `until` is set; omit both to default to the last 30 days, or pass a relative duration like \"30d\""), nil + } + endTime = time.Now().Unix() + startTime = endTime - int64(DefaultIncidentWindow/time.Second) + } + if limit <= 0 { limit = defaultQueryLimit } diff --git a/pkg/flashduty/incidents_window_test.go b/pkg/flashduty/incidents_window_test.go new file mode 100644 index 0000000..4da79f2 --- /dev/null +++ b/pkg/flashduty/incidents_window_test.go @@ -0,0 +1,169 @@ +package flashduty + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + flashduty "github.com/flashcatcloud/go-flashduty" + "github.com/mark3labs/mcp-go/mcp" + + "github.com/flashcatcloud/flashduty-mcp-server/pkg/translations" +) + +// newQueryIncidentsHarness spins up a fake /incident/list backend that records +// the request body, and returns the wired query_incidents handler. +func newQueryIncidentsHarness(t *testing.T, gotBody *map[string]any) (server *httptest.Server, handler func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error)) { + t.Helper() + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = json.NewDecoder(r.Body).Decode(gotBody) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"items": []any{}, "total": 0}, + }) + })) + + client, err := flashduty.NewClient("test-key", flashduty.WithBaseURL(ts.URL)) + if err != nil { + t.Fatalf("new go-flashduty client: %v", err) + } + _, h := QueryIncidents(func(ctx context.Context) (context.Context, *Clients, error) { + return ctx, &Clients{New: client}, nil + }, translations.NullTranslationHelper) + return ts, h +} + +func bodyInt(t *testing.T, body map[string]any, key string) int64 { + t.Helper() + v, ok := body[key] + if !ok { + t.Fatalf("request body missing %q; got %#v", key, body) + } + f, ok := v.(float64) // JSON numbers decode to float64 + if !ok { + t.Fatalf("%q = %#v, want a number", key, v) + } + return int64(f) +} + +// TestQueryIncidentsDefaultsWindowWhenOmitted is the regression for the reported +// failure: "find current severe incidents" with severity+progress but NO +// since/until used to be rejected with "both since and until are required". +// It must now succeed and default to the last 30 days. +func TestQueryIncidentsDefaultsWindowWhenOmitted(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + before := time.Now().Unix() + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + // Mirrors the real report: severity + progress, no time window. + "severity": "Critical", + "progress": "Triggered,Processing", + }, + }, + }) + after := time.Now().Unix() + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if result.IsError { + txt, _ := mcp.AsTextContent(result.Content[0]) + t.Fatalf("expected success with defaulted window, got error: %s", txt.Text) + } + + start := bodyInt(t, gotBody, "start_time") + end := bodyInt(t, gotBody, "end_time") + + // end should be ~now. + if end < before || end > after { + t.Errorf("end_time = %d, want within [%d,%d] (now)", end, before, after) + } + // span should be ~30 days (allow a couple seconds of clock drift). + wantSpan := int64(DefaultIncidentWindow / time.Second) + if span := end - start; span < wantSpan-5 || span > wantSpan+5 { + t.Errorf("window span = %ds, want ~%ds (30 days)", span, wantSpan) + } + // the defaulted window must be inside the backend's 31-day cap. + if span := end - start; span > int64(MaxTimeWindow/time.Second) { + t.Errorf("defaulted span %ds exceeds MaxTimeWindow", span) + } +} + +// TestQueryIncidentsOnlyUntilErrors: a one-sided window — only `until` given, +// `since` omitted — is a real mistake, so it must error without hitting the +// backend rather than silently inventing a `since`. (Both-omitted defaults to +// 30d; a bare `since` is fine because `until` defaults to now — see the +// neighbouring tests.) +func TestQueryIncidentsOnlyUntilErrors(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + // only `until` provided, `since` omitted → helpful error, no backend call. + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + "until": "now", + }, + }, + }) + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if !result.IsError { + t.Fatalf("expected an error when only `until` is given, got success") + } + if gotBody != nil { + t.Errorf("backend should not be called on the one-sided-window error; got body %#v", gotBody) + } +} + +// TestQueryIncidentsOnlySinceUsesNowUntil confirms the existing contract is +// preserved: a bare `since` is fine because `until` documents a "now" default. +func TestQueryIncidentsBareSinceDefaultsUntilToNow(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + before := time.Now().Unix() + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + "since": "7d", + }, + }, + }) + after := time.Now().Unix() + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if result.IsError { + txt, _ := mcp.AsTextContent(result.Content[0]) + t.Fatalf("bare `since` should succeed, got error: %s", txt.Text) + } + + end := bodyInt(t, gotBody, "end_time") + if end < before || end > after { + t.Errorf("end_time = %d, want ~now within [%d,%d]", end, before, after) + } + start := bodyInt(t, gotBody, "start_time") + wantStart := before - 7*24*3600 + if start < wantStart-5 || start > wantStart+5 { + t.Errorf("start_time = %d, want ~now-7d (%d)", start, wantStart) + } +} diff --git a/pkg/flashduty/time_args.go b/pkg/flashduty/time_args.go index 0589d45..14e7c83 100644 --- a/pkg/flashduty/time_args.go +++ b/pkg/flashduty/time_args.go @@ -14,6 +14,11 @@ import ( // so the LLM gets a guided error before the round-trip. const MaxTimeWindow = 31 * 24 * time.Hour +// DefaultIncidentWindow is the lookback query_incidents applies when the caller +// omits both since and until ("current open incidents" with no explicit range). +// Kept under MaxTimeWindow so the defaulted span never trips the backend cap. +const DefaultIncidentWindow = 30 * 24 * time.Hour + // SinceDescription / UntilDescription are reused across query_incidents // and query_changes. The wording is tuned for LLM callers that // otherwise pick absolute dates from stale training data and silently query @@ -43,6 +48,23 @@ func WithUntil(opts ...mcp.PropertyOption) mcp.ToolOption { return mcp.WithString("until", append([]mcp.PropertyOption{mcp.Description(UntilDescription)}, opts...)...) } +// argProvided reports whether an MCP argument was actually supplied with a +// usable value. It mirrors timeutil.ParseAny's "not provided" semantics: a +// missing key (nil) or an empty string both count as omitted. Callers use this +// to distinguish "the user left this out" from a parsed zero — important for +// since/until, where parseUntilArg collapses a missing `until` into "now" and +// thereby loses the omission signal. +func argProvided(v any) bool { + switch x := v.(type) { + case nil: + return false + case string: + return x != "" + default: + return true + } +} + // parseUntilArg parses an "until" argument, defaulting to "now" when missing // or empty. UntilDescription advertises this default, but timeutil.ParseAny // on nil returns 0 (its sentinel for "not provided"), so callers must resolve From a787f539eeda50188cf064f0094ad1177c8335ed Mon Sep 17 00:00:00 2001 From: ysyneu Date: Wed, 10 Jun 2026 10:21:22 +0800 Subject: [PATCH 2/2] refactor(query_incidents): compose window desc from shared constant The per-tool since/until descriptions duplicated the whole shared date-format guidance just to prepend OPTIONAL, which would drift from SinceDescription/UntilDescription over time. The only tool-specific fact is the both-omitted 30-day default, so compose incidentSinceDescription from the shared SinceDescription + that one sentence, and revert until to the shared WithUntil() (it already documents its now default). Behavior is unchanged; the handler default is what actually fixes the bug. --- pkg/flashduty/incidents.go | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/pkg/flashduty/incidents.go b/pkg/flashduty/incidents.go index 87cc25d..b082e00 100644 --- a/pkg/flashduty/incidents.go +++ b/pkg/flashduty/incidents.go @@ -19,27 +19,13 @@ const defaultQueryLimit = 20 const queryIncidentsDescription = `Query incidents by IDs, short ids (nums), time range, status, severity, channel, or free-text query. Returns the incident list with an alerts_total count per incident; for the actual alert objects of one or more incidents, call query_incident_alerts(incident_ids=...).` -// incidentSinceDescription / incidentUntilDescription override the shared -// SinceDescription / UntilDescription for query_incidents, where the window is -// optional: omit both bounds to query "current" open incidents and the tool -// defaults to the last 30 days. (query_changes keeps the shared wording.) -const ( - incidentSinceDescription = "OPTIONAL lower bound of the query window. " + - "Omit BOTH since and until to default to the last 30 days — the natural " + - "choice for \"current\" / open incidents. " + - "PREFER relative durations like \"24h\", \"7d\", \"30m\" — they are anchored " + - "to server time and immune to your training-data cutoff. " + - "Use absolute dates (\"2026-04-01\" or \"2026-04-01 10:00:00\") ONLY when " + - "the user explicitly asked for a specific calendar date; double-check the " + - "year, since picking the wrong year returns silently incorrect data. " + - "Also accepts unix seconds (\"1712000000\") and \"now\". " + - "Max window (until - since): 31 days. Data older than ~90 days may have been purged." - - incidentUntilDescription = "OPTIONAL upper bound of the query window. Same formats as `since`, plus " + - "future durations like \"+24h\", \"+7d\". Defaults to \"now\" when omitted. " + - "Omit BOTH since and until to default to the last 30 days. " + - "Must be greater than `since` and within 31 days of it." -) +// incidentSinceDescription extends the shared SinceDescription with +// query_incidents' optional-window behavior: omit BOTH bounds to query +// "current" / open incidents and the tool defaults to the last 30 days. +// (query_changes keeps the shared wording, where the window is mandatory.) +// Composed from the shared constant so the date-format guidance can't drift. +const incidentSinceDescription = SinceDescription + + " You may omit BOTH since and until to query current/open incidents; the tool then defaults to the last 30 days." // QueryIncidents creates a tool to query incidents with enriched data func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { @@ -54,7 +40,7 @@ func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHe mcp.WithString("severity", mcp.Description("Filter by severity level. Valid values: Info, Warning, Critical."), mcp.Enum("Info", "Warning", "Critical")), mcp.WithString("channel_ids", mcp.Description("Comma-separated collaboration space IDs to filter by. Backend expects an array — singular channel_id is silently ignored.")), WithSince(mcp.Description(incidentSinceDescription)), - WithUntil(mcp.Description(incidentUntilDescription)), + WithUntil(), mcp.WithString("query", mcp.Description("Free-text search across title, labels, and content (Doris full-text). A 24-char hex string is resolved as an incident ID; a 6-char string is resolved as an incident num. Prefer this over picking exact filter values when the user gives a fuzzy keyword."), mcp.MaxLength(200)), mcp.WithString("nums", mcp.Description("Comma-separated short incident ids (num — the 6-char id shown in the UI, e.g. 311510). Matched within the since/until window; the backend caps the list span at ~30 days, so incidents older than that must be looked up by their full incident_id.")), mcp.WithNumber("limit", mcp.Description(LimitDescription), mcp.DefaultNumber(20), mcp.Min(1), mcp.Max(100)),