Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions examples/contrib-sli-total-amount-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: sloth.slok.dev/v1
kind: PrometheusServiceLevel
metadata:
name: svc
namespace: test-ns
spec:
service: "svc01"
labels:
global01k1: global01v1
sloPlugins:
chain:
- id: "sloth.dev/contrib/sli_total_amount/v1"
slos:
- name: "slo1"
objective: 99.9
description: "This is SLO 01."
labels:
global02k1: global02v1
sli:
events:
errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
alerting:
name: myServiceAlert
labels:
alert01k1: "alert01v1"
annotations:
alert02k1: "alert02k2"
pageAlert:
labels:
alert03k1: "alert03v1"
ticketAlert:
labels:
alert04k1: "alert04v1"
23 changes: 23 additions & 0 deletions internal/plugin/slo/contrib/sli_total_amount_rules_v1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# SLI Total Amount Rules Plugin for Sloth

This plugin additionally generates Prometheus recording rules for the total SLI amount, preserving the `TotalQuery` from the SLO spec. It is designed to be used as an SLO plugin in Sloth's plugin chain, and outputs rules to the metric `slo:sli_total:amount`.

## Features
- Generates a Prometheus rule group for the SLI total amount per SLO.
- Ensures unique rule group names to avoid conflicts (e.g., `sloth-slo-sli-total-amount-<slo-id>`).
- Preserves the original `TotalQuery` from the SLO definition.

## Usage example

Add the plugin to the `sloPlugins.chain` section of your SLO YAML:

```yaml
sloPlugins:
chain:
- id: "sloth.dev/contrib/sli_total_amount_rules/v1"
```

## License

This plugin is licensed under the Apache 2.0 License. See [LICENSE](../../../../LICENSE) for details.

93 changes: 93 additions & 0 deletions internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package plugin

import (
"bytes"
"context"
"encoding/json"
"fmt"
"text/template"

"github.com/prometheus/prometheus/model/rulefmt"
"github.com/slok/sloth/pkg/common/conventions"
"github.com/slok/sloth/pkg/common/model"
utilsdata "github.com/slok/sloth/pkg/common/utils/data"
promutils "github.com/slok/sloth/pkg/common/utils/prometheus"
pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
)

const (
PluginVersion = "prometheus/slo/v1"
PluginID = "sloth.dev/contrib/sli_total_amount/v1"
sliTotalAmountMetric = "slo:sli_total:amount"
sliTotalAmountGroupNamePrefix = "sloth-slo-sli-total-amount-"
)

type PluginConfig struct{}

func NewPlugin(c json.RawMessage, _ pluginslov1.AppUtils) (pluginslov1.Plugin, error) {
cfg := &PluginConfig{}
err := json.Unmarshal(c, cfg)
if err != nil {
return nil, err
}

return plugin{cfg: *cfg}, nil
}

type plugin struct {
cfg PluginConfig
}

func (p plugin) ProcessSLO(ctx context.Context, request *pluginslov1.Request, result *pluginslov1.Result) error {
if request.SLO.SLI.Events == nil || request.SLO.SLI.Events.TotalQuery == "" {
return fmt.Errorf("SLI event type with TotalQuery required")
}

rules, err := p.generateSLITotalRecordingRules(ctx, request.SLO, request.MWMBAlertGroup)
if err != nil {
return err
}

customGroup := model.PromRuleGroup{
Name: sliTotalAmountGroupNamePrefix + request.SLO.ID,
Interval: 0, // or set as needed
Rules: rules,
}

result.SLORules.ExtraRules = append(result.SLORules.ExtraRules, customGroup)
return nil
}

func (p plugin) generateSLITotalRecordingRules(ctx context.Context, slo model.PromSLO, alerts model.MWMBAlertGroup) ([]rulefmt.Rule, error) {
windows := alerts.TimeDurationWindows()
windows = append(windows, slo.TimeWindow)

labels := utilsdata.MergeLabels(conventions.GetSLOIDPromLabels(slo), slo.Labels)
rules := make([]rulefmt.Rule, 0, len(windows))

for _, window := range windows {
windowStr := promutils.TimeDurationToPromStr(window)
recordName := sliTotalAmountMetric + windowStr

tpl, err := template.New("totalQuery").Option("missingkey=error").Parse(slo.SLI.Events.TotalQuery)
if err != nil {
return nil, fmt.Errorf("could not create template for %s: %w", recordName, err)
}

var buf bytes.Buffer
err = tpl.Execute(&buf, map[string]string{
conventions.TplSLIQueryWindowVarName: windowStr,
})
if err != nil {
return nil, fmt.Errorf("could not render TotalQuery for %s: %w", recordName, err)
}

rule := rulefmt.Rule{
Record: recordName,
Expr: buf.String(),
Labels: labels,
}
rules = append(rules, rule)
}
return rules, nil
}
103 changes: 103 additions & 0 deletions internal/plugin/slo/contrib/sli_total_amount_rules_v1/plugin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package plugin_test

import (
"encoding/json"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

plugin "github.com/slok/sloth/internal/plugin/slo/contrib/sli_total_amount_rules_v1"
"github.com/slok/sloth/pkg/common/model"
pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
)

func baseAlertGroup() model.MWMBAlertGroup {
return model.MWMBAlertGroup{
PageQuick: model.MWMBAlert{
ShortWindow: 5 * time.Minute,
LongWindow: 1 * time.Hour,
},
}
}

type SLOOption func(*model.PromSLO)

func baseSLO(opts ...SLOOption) model.PromSLO {
slo := model.PromSLO{
ID: "svc01-slo1",
Name: "slo1",
Service: "svc01",
TimeWindow: 30 * 24 * time.Hour,
SLI: model.PromSLI{
Events: &model.PromSLIEvents{
ErrorQuery: `sum(rate(http_requests_total{job="api",status=~"5.."}[{{.window}}]))`,
},
},
Labels: map[string]string{
"global01k1": "global01v1",
"global02k1": "global02v1",
},
}

for _, opt := range opts {
opt(&slo)
}

return slo
}

func withTotalQuery() SLOOption {
return func(slo *model.PromSLO) {
slo.SLI.Events.TotalQuery = `sum(rate(http_requests_total{job="api"}[{{.window}}]))`
}
}

func TestProcessSLO_NoRules(t *testing.T) {
cfgBytes, err := json.Marshal(plugin.PluginConfig{})
require.NoError(t, err)

plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{})
require.NoError(t, err)

req := &pluginslov1.Request{
SLO: baseSLO(),
MWMBAlertGroup: baseAlertGroup(),
}
result := &pluginslov1.Result{}

err = plug.ProcessSLO(t.Context(), req, result)
require.Error(t, err)

myAssert := assert.New(t)
myAssert.Empty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules")
}

func TestProcessSLO_AppendsCustomRuleGroup(t *testing.T) {
cfgBytes, err := json.Marshal(plugin.PluginConfig{})
require.NoError(t, err)

plug, err := plugin.NewPlugin(cfgBytes, pluginslov1.AppUtils{})
require.NoError(t, err)

req := &pluginslov1.Request{
SLO: baseSLO(withTotalQuery()),
MWMBAlertGroup: baseAlertGroup(),
}
result := &pluginslov1.Result{}

err = plug.ProcessSLO(t.Context(), req, result)
require.NoError(t, err)

myAssert := assert.New(t)
if myAssert.NotEmpty(result.SLORules.ExtraRules, "expected at least one rule group in ExtraRules") {
group := result.SLORules.ExtraRules[0]
myAssert.Equal("sloth-slo-sli-total-amount-svc01-slo1", group.Name)
myAssert.NotEmpty(group.Rules, "expected at least one rule in the group")
// Optionally, check the first rule's Record and Expr.
rule := group.Rules[0]
myAssert.Contains(rule.Record, "slo:sli_total:amount")
myAssert.Contains(rule.Expr, "sum(rate(http_requests_total")
}
}