diff --git a/pkg/flashduty/incidents.go b/pkg/flashduty/incidents.go index 8f06748..b082e00 100644 --- a/pkg/flashduty/incidents.go +++ b/pkg/flashduty/incidents.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "strings" + "time" flashduty "github.com/flashcatcloud/go-flashduty" "github.com/mark3labs/mcp-go/mcp" @@ -18,6 +19,14 @@ const defaultQueryLimit = 20 const queryIncidentsDescription = `Query incidents by IDs, short ids (nums), time range, status, severity, channel, or free-text query. Returns the incident list with an alerts_total count per incident; for the actual alert objects of one or more incidents, call query_incident_alerts(incident_ids=...).` +// incidentSinceDescription extends the shared SinceDescription with +// query_incidents' optional-window behavior: omit BOTH bounds to query +// "current" / open incidents and the tool defaults to the last 30 days. +// (query_changes keeps the shared wording, where the window is mandatory.) +// Composed from the shared constant so the date-format guidance can't drift. +const incidentSinceDescription = SinceDescription + + " You may omit BOTH since and until to query current/open incidents; the tool then defaults to the last 30 days." + // QueryIncidents creates a tool to query incidents with enriched data func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHelperFunc) (tool mcp.Tool, handler server.ToolHandlerFunc) { return mcp.NewTool("query_incidents", @@ -30,7 +39,7 @@ func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHe mcp.WithString("progress", mcp.Description("Filter by status. Valid values: Triggered, Processing, Closed. Comma-separated for multiple."), mcp.Enum("Triggered", "Processing", "Closed", "Triggered,Processing", "Processing,Closed", "Triggered,Closed", "Triggered,Processing,Closed")), mcp.WithString("severity", mcp.Description("Filter by severity level. Valid values: Info, Warning, Critical."), mcp.Enum("Info", "Warning", "Critical")), mcp.WithString("channel_ids", mcp.Description("Comma-separated collaboration space IDs to filter by. Backend expects an array — singular channel_id is silently ignored.")), - WithSince(), + WithSince(mcp.Description(incidentSinceDescription)), WithUntil(), mcp.WithString("query", mcp.Description("Free-text search across title, labels, and content (Doris full-text). A 24-char hex string is resolved as an incident ID; a 6-char string is resolved as an incident num. Prefer this over picking exact filter values when the user gives a fuzzy keyword."), mcp.MaxLength(200)), mcp.WithString("nums", mcp.Description("Comma-separated short incident ids (num — the 6-char id shown in the UI, e.g. 311510). Matched within the since/until window; the backend caps the list span at ~30 days, so incidents older than that must be looked up by their full incident_id.")), @@ -59,6 +68,23 @@ func QueryIncidents(getClient GetFlashdutyClientFn, t translations.TranslationHe return mcp.NewToolResultError(fmt.Sprintf("invalid until: %v", err)), nil } + // "current open incidents" with no window is the common ask, so when + // BOTH bounds are omitted default to the last 30 days (under the + // 31-day backend cap) instead of rejecting the call. A bare `until` + // is fine (it documents a "now" default, already applied by + // parseUntilArg); a bare `since` is still a real mistake worth an + // error. parseUntilArg has collapsed a missing `until` into "now", so + // detect omission from the raw args, not the parsed values. + sinceProvided := argProvided(args["since"]) + untilProvided := argProvided(args["until"]) + if !sinceProvided { + if untilProvided { + return mcp.NewToolResultError("`since` is required when `until` is set; omit both to default to the last 30 days, or pass a relative duration like \"30d\""), nil + } + endTime = time.Now().Unix() + startTime = endTime - int64(DefaultIncidentWindow/time.Second) + } + if limit <= 0 { limit = defaultQueryLimit } diff --git a/pkg/flashduty/incidents_window_test.go b/pkg/flashduty/incidents_window_test.go new file mode 100644 index 0000000..4da79f2 --- /dev/null +++ b/pkg/flashduty/incidents_window_test.go @@ -0,0 +1,169 @@ +package flashduty + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + flashduty "github.com/flashcatcloud/go-flashduty" + "github.com/mark3labs/mcp-go/mcp" + + "github.com/flashcatcloud/flashduty-mcp-server/pkg/translations" +) + +// newQueryIncidentsHarness spins up a fake /incident/list backend that records +// the request body, and returns the wired query_incidents handler. +func newQueryIncidentsHarness(t *testing.T, gotBody *map[string]any) (server *httptest.Server, handler func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error)) { + t.Helper() + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = json.NewDecoder(r.Body).Decode(gotBody) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "data": map[string]any{"items": []any{}, "total": 0}, + }) + })) + + client, err := flashduty.NewClient("test-key", flashduty.WithBaseURL(ts.URL)) + if err != nil { + t.Fatalf("new go-flashduty client: %v", err) + } + _, h := QueryIncidents(func(ctx context.Context) (context.Context, *Clients, error) { + return ctx, &Clients{New: client}, nil + }, translations.NullTranslationHelper) + return ts, h +} + +func bodyInt(t *testing.T, body map[string]any, key string) int64 { + t.Helper() + v, ok := body[key] + if !ok { + t.Fatalf("request body missing %q; got %#v", key, body) + } + f, ok := v.(float64) // JSON numbers decode to float64 + if !ok { + t.Fatalf("%q = %#v, want a number", key, v) + } + return int64(f) +} + +// TestQueryIncidentsDefaultsWindowWhenOmitted is the regression for the reported +// failure: "find current severe incidents" with severity+progress but NO +// since/until used to be rejected with "both since and until are required". +// It must now succeed and default to the last 30 days. +func TestQueryIncidentsDefaultsWindowWhenOmitted(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + before := time.Now().Unix() + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + // Mirrors the real report: severity + progress, no time window. + "severity": "Critical", + "progress": "Triggered,Processing", + }, + }, + }) + after := time.Now().Unix() + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if result.IsError { + txt, _ := mcp.AsTextContent(result.Content[0]) + t.Fatalf("expected success with defaulted window, got error: %s", txt.Text) + } + + start := bodyInt(t, gotBody, "start_time") + end := bodyInt(t, gotBody, "end_time") + + // end should be ~now. + if end < before || end > after { + t.Errorf("end_time = %d, want within [%d,%d] (now)", end, before, after) + } + // span should be ~30 days (allow a couple seconds of clock drift). + wantSpan := int64(DefaultIncidentWindow / time.Second) + if span := end - start; span < wantSpan-5 || span > wantSpan+5 { + t.Errorf("window span = %ds, want ~%ds (30 days)", span, wantSpan) + } + // the defaulted window must be inside the backend's 31-day cap. + if span := end - start; span > int64(MaxTimeWindow/time.Second) { + t.Errorf("defaulted span %ds exceeds MaxTimeWindow", span) + } +} + +// TestQueryIncidentsOnlyUntilErrors: a one-sided window — only `until` given, +// `since` omitted — is a real mistake, so it must error without hitting the +// backend rather than silently inventing a `since`. (Both-omitted defaults to +// 30d; a bare `since` is fine because `until` defaults to now — see the +// neighbouring tests.) +func TestQueryIncidentsOnlyUntilErrors(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + // only `until` provided, `since` omitted → helpful error, no backend call. + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + "until": "now", + }, + }, + }) + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if !result.IsError { + t.Fatalf("expected an error when only `until` is given, got success") + } + if gotBody != nil { + t.Errorf("backend should not be called on the one-sided-window error; got body %#v", gotBody) + } +} + +// TestQueryIncidentsOnlySinceUsesNowUntil confirms the existing contract is +// preserved: a bare `since` is fine because `until` documents a "now" default. +func TestQueryIncidentsBareSinceDefaultsUntilToNow(t *testing.T) { + t.Parallel() + + var gotBody map[string]any + ts, handler := newQueryIncidentsHarness(t, &gotBody) + defer ts.Close() + + before := time.Now().Unix() + result, err := handler(context.Background(), mcp.CallToolRequest{ + Params: mcp.CallToolParams{ + Name: "query_incidents", + Arguments: map[string]any{ + "since": "7d", + }, + }, + }) + after := time.Now().Unix() + if err != nil { + t.Fatalf("handler returned error: %v", err) + } + if result.IsError { + txt, _ := mcp.AsTextContent(result.Content[0]) + t.Fatalf("bare `since` should succeed, got error: %s", txt.Text) + } + + end := bodyInt(t, gotBody, "end_time") + if end < before || end > after { + t.Errorf("end_time = %d, want ~now within [%d,%d]", end, before, after) + } + start := bodyInt(t, gotBody, "start_time") + wantStart := before - 7*24*3600 + if start < wantStart-5 || start > wantStart+5 { + t.Errorf("start_time = %d, want ~now-7d (%d)", start, wantStart) + } +} diff --git a/pkg/flashduty/time_args.go b/pkg/flashduty/time_args.go index 0589d45..14e7c83 100644 --- a/pkg/flashduty/time_args.go +++ b/pkg/flashduty/time_args.go @@ -14,6 +14,11 @@ import ( // so the LLM gets a guided error before the round-trip. const MaxTimeWindow = 31 * 24 * time.Hour +// DefaultIncidentWindow is the lookback query_incidents applies when the caller +// omits both since and until ("current open incidents" with no explicit range). +// Kept under MaxTimeWindow so the defaulted span never trips the backend cap. +const DefaultIncidentWindow = 30 * 24 * time.Hour + // SinceDescription / UntilDescription are reused across query_incidents // and query_changes. The wording is tuned for LLM callers that // otherwise pick absolute dates from stale training data and silently query @@ -43,6 +48,23 @@ func WithUntil(opts ...mcp.PropertyOption) mcp.ToolOption { return mcp.WithString("until", append([]mcp.PropertyOption{mcp.Description(UntilDescription)}, opts...)...) } +// argProvided reports whether an MCP argument was actually supplied with a +// usable value. It mirrors timeutil.ParseAny's "not provided" semantics: a +// missing key (nil) or an empty string both count as omitted. Callers use this +// to distinguish "the user left this out" from a parsed zero — important for +// since/until, where parseUntilArg collapses a missing `until` into "now" and +// thereby loses the omission signal. +func argProvided(v any) bool { + switch x := v.(type) { + case nil: + return false + case string: + return x != "" + default: + return true + } +} + // parseUntilArg parses an "until" argument, defaulting to "now" when missing // or empty. UntilDescription advertises this default, but timeutil.ParseAny // on nil returns 0 (its sentinel for "not provided"), so callers must resolve