Skip to content

Commit 0000f05

Browse files
feat: adds various OCI metrics (#25493)
Signed-off-by: Patroklos Papapetrou <[email protected]>
1 parent cc57831 commit 0000f05

File tree

7 files changed

+367
-33
lines changed

7 files changed

+367
-33
lines changed

docs/operator-manual/metrics.md

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -255,14 +255,21 @@ Metrics about the Repo Server. The gRPC metrics are not exposed by default. Met
255255
Scraped at the `argocd-repo-server:8084/metrics` endpoint.
256256

257257

258-
| Metric | Type | Description |
259-
| --------------------------------------- | :-------: | ------------------------------------------------------------------------- |
260-
| `argocd_git_request_duration_seconds` | histogram | Git requests duration seconds. |
261-
| `argocd_git_request_total` | counter | Number of git requests performed by repo server |
262-
| `argocd_git_fetch_fail_total` | counter | Number of git fetch requests failures by repo server |
263-
| `argocd_redis_request_duration_seconds` | histogram | Redis requests duration seconds. |
264-
| `argocd_redis_request_total` | counter | Number of Kubernetes requests executed during application reconciliation. |
265-
| `argocd_repo_pending_request_total` | gauge | Number of pending requests requiring repository lock |
258+
| Metric | Type | Description |
259+
|------------------------------------------|:----------:|---------------------------------------------------------------------------|
260+
| `argocd_git_request_duration_seconds` | histogram | Git requests duration seconds. |
261+
| `argocd_git_request_total` | counter | Number of git requests performed by repo server |
262+
| `argocd_git_fetch_fail_total` | counter | Number of git fetch requests failures by repo server |
263+
| `argocd_redis_request_duration_seconds` | histogram | Redis requests duration seconds. |
264+
| `argocd_redis_request_total` | counter | Number of Kubernetes requests executed during application reconciliation. |
265+
| `argocd_repo_pending_request_total` | gauge | Number of pending requests requiring repository lock |
266+
| `argocd_oci_request_total` | counter | Number of OCI requests performed by repo server |
267+
| `argocd_oci_request_duration_seconds` | histogram | Number of OCI fetch requests failures by repo server |
268+
| `argocd_oci_test_repo_fail_total` | counter | Number of OCI test repo requests failures by repo server |
269+
| `argocd_oci_get_tags_fail_total` | counter | Number of OCI get tags requests failures by repo server |
270+
| `argocd_oci_digest_metadata_fail_total` | counter | Number of OCI digest metadata failures by repo server |
271+
| `argocd_oci_resolve_revision_fail_total` | counter | Number of OCI resolve revision failures by repo server |
272+
| `argocd_oci_extract_fail_total` | counter | Number of OCI extract requests failures by repo server |
266273

267274
## Commit Server Metrics
268275

reposerver/metrics/metrics.go

Lines changed: 130 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,22 @@ import (
1111
)
1212

1313
type MetricsServer struct {
14-
handler http.Handler
15-
gitFetchFailCounter *prometheus.CounterVec
16-
gitLsRemoteFailCounter *prometheus.CounterVec
17-
gitRequestCounter *prometheus.CounterVec
18-
gitRequestHistogram *prometheus.HistogramVec
19-
repoPendingRequestsGauge *prometheus.GaugeVec
20-
redisRequestCounter *prometheus.CounterVec
21-
redisRequestHistogram *prometheus.HistogramVec
22-
PrometheusRegistry *prometheus.Registry
14+
handler http.Handler
15+
gitFetchFailCounter *prometheus.CounterVec
16+
gitLsRemoteFailCounter *prometheus.CounterVec
17+
gitRequestCounter *prometheus.CounterVec
18+
gitRequestHistogram *prometheus.HistogramVec
19+
repoPendingRequestsGauge *prometheus.GaugeVec
20+
redisRequestCounter *prometheus.CounterVec
21+
redisRequestHistogram *prometheus.HistogramVec
22+
ociExtractFailCounter *prometheus.CounterVec
23+
ociResolveRevisionFailCounter *prometheus.CounterVec
24+
ociDigestMetadataCounter *prometheus.CounterVec
25+
ociGetTagsFailCounter *prometheus.CounterVec
26+
ociTestRepoFailCounter *prometheus.CounterVec
27+
ociRequestCounter *prometheus.CounterVec
28+
ociRequestHistogram *prometheus.HistogramVec
29+
PrometheusRegistry *prometheus.Registry
2330
}
2431

2532
type GitRequestType string
@@ -100,16 +107,87 @@ func NewMetricsServer() *MetricsServer {
100107
)
101108
registry.MustRegister(redisRequestHistogram)
102109

110+
ociExtractFailCounter := prometheus.NewCounterVec(
111+
prometheus.CounterOpts{
112+
Name: "argocd_oci_extract_fail_total",
113+
Help: "Number of OCI extract requests failures by repo server",
114+
},
115+
[]string{"repo", "revision"},
116+
)
117+
registry.MustRegister(ociExtractFailCounter)
118+
119+
ociResolveRevisionFailCounter := prometheus.NewCounterVec(
120+
prometheus.CounterOpts{
121+
Name: "argocd_oci_resolve_revision_fail_total",
122+
Help: "Number of OCI resolve revision requests failures by repo server",
123+
},
124+
[]string{"repo", "revision"},
125+
)
126+
registry.MustRegister(ociResolveRevisionFailCounter)
127+
128+
ociDigestMetadataCounter := prometheus.NewCounterVec(
129+
prometheus.CounterOpts{
130+
Name: "argocd_oci_digest_metadata_fail_total",
131+
Help: "Number of OCI digest metadata requests failures by repo server",
132+
},
133+
[]string{"repo", "revision"},
134+
)
135+
registry.MustRegister(ociDigestMetadataCounter)
136+
137+
ociGetTagsFailCounter := prometheus.NewCounterVec(
138+
prometheus.CounterOpts{
139+
Name: "argocd_oci_get_tags_fail_total",
140+
Help: "Number of OCI get tags failures by repo server",
141+
},
142+
[]string{"repo"},
143+
)
144+
registry.MustRegister(ociGetTagsFailCounter)
145+
146+
ociTestRepoFailCounter := prometheus.NewCounterVec(
147+
prometheus.CounterOpts{
148+
Name: "argocd_oci_test_repo_fail_total",
149+
Help: "Number of OCI test repo requests failures by repo server",
150+
},
151+
[]string{"repo"},
152+
)
153+
registry.MustRegister(ociTestRepoFailCounter)
154+
155+
ociRequestCounter := prometheus.NewCounterVec(
156+
prometheus.CounterOpts{
157+
Name: "argocd_oci_request_total",
158+
Help: "Number of OCI requests performed by repo server",
159+
},
160+
[]string{"repo", "request_type"},
161+
)
162+
registry.MustRegister(ociRequestCounter)
163+
164+
ociRequestHistogram := prometheus.NewHistogramVec(
165+
prometheus.HistogramOpts{
166+
Name: "argocd_oci_request_duration_seconds",
167+
Help: "OCI requests duration seconds.",
168+
Buckets: []float64{0.1, 0.25, .5, 1, 2, 4, 10, 20},
169+
},
170+
[]string{"repo", "request_type"},
171+
)
172+
registry.MustRegister(ociRequestHistogram)
173+
103174
return &MetricsServer{
104-
handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
105-
gitFetchFailCounter: gitFetchFailCounter,
106-
gitLsRemoteFailCounter: gitLsRemoteFailCounter,
107-
gitRequestCounter: gitRequestCounter,
108-
gitRequestHistogram: gitRequestHistogram,
109-
repoPendingRequestsGauge: repoPendingRequestsGauge,
110-
redisRequestCounter: redisRequestCounter,
111-
redisRequestHistogram: redisRequestHistogram,
112-
PrometheusRegistry: registry,
175+
handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
176+
gitFetchFailCounter: gitFetchFailCounter,
177+
gitLsRemoteFailCounter: gitLsRemoteFailCounter,
178+
gitRequestCounter: gitRequestCounter,
179+
gitRequestHistogram: gitRequestHistogram,
180+
repoPendingRequestsGauge: repoPendingRequestsGauge,
181+
redisRequestCounter: redisRequestCounter,
182+
redisRequestHistogram: redisRequestHistogram,
183+
ociRequestCounter: ociRequestCounter,
184+
ociRequestHistogram: ociRequestHistogram,
185+
ociExtractFailCounter: ociExtractFailCounter,
186+
ociResolveRevisionFailCounter: ociResolveRevisionFailCounter,
187+
ociGetTagsFailCounter: ociGetTagsFailCounter,
188+
ociDigestMetadataCounter: ociDigestMetadataCounter,
189+
ociTestRepoFailCounter: ociTestRepoFailCounter,
190+
PrometheusRegistry: registry,
113191
}
114192
}
115193

@@ -149,3 +227,37 @@ func (m *MetricsServer) IncRedisRequest(failed bool) {
149227
func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) {
150228
m.redisRequestHistogram.WithLabelValues("argocd-repo-server").Observe(duration.Seconds())
151229
}
230+
231+
// IncOCIRequest increments the OCI requests counter
232+
func (m *MetricsServer) IncOCIRequest(repo string, requestType OCIRequestType) {
233+
m.ociRequestCounter.WithLabelValues(repo, string(requestType)).Inc()
234+
}
235+
236+
func (m *MetricsServer) ObserveOCIRequestDuration(repo string, requestType OCIRequestType, duration time.Duration) {
237+
m.ociRequestHistogram.WithLabelValues(repo, string(requestType)).Observe(duration.Seconds())
238+
}
239+
240+
// IncOCIExtractFailCounter increments the OCI failed extract requests counter
241+
func (m *MetricsServer) IncOCIExtractFailCounter(repo string, revision string) {
242+
m.ociExtractFailCounter.WithLabelValues(repo, revision).Inc()
243+
}
244+
245+
// IncOCIResolveRevisionFailCounter increments the OCI failed resolve revision requests counter
246+
func (m *MetricsServer) IncOCIResolveRevisionFailCounter(repo string, revision string) {
247+
m.ociResolveRevisionFailCounter.WithLabelValues(repo, revision).Inc()
248+
}
249+
250+
// IncOCIDigestMetadataCounter increments the OCI failed digest metadata requests counter
251+
func (m *MetricsServer) IncOCIDigestMetadataCounter(repo string, revision string) {
252+
m.ociDigestMetadataCounter.WithLabelValues(repo, revision).Inc()
253+
}
254+
255+
// IncOCIGetTagsFailCounter increments the OCI failed get tags requests counter
256+
func (m *MetricsServer) IncOCIGetTagsFailCounter(repo string) {
257+
m.ociGetTagsFailCounter.WithLabelValues(repo).Inc()
258+
}
259+
260+
// IncOCITestRepoFailCounter increments the OCI failed test repo requests counter
261+
func (m *MetricsServer) IncOCITestRepoFailCounter(repo string) {
262+
m.ociTestRepoFailCounter.WithLabelValues(repo).Inc()
263+
}

reposerver/metrics/ocihandlers.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package metrics
2+
3+
import (
4+
"time"
5+
6+
"github.com/argoproj/argo-cd/v3/util/oci"
7+
)
8+
9+
type OCIRequestType string
10+
11+
const (
12+
OCIRequestTypeExtract = "extract"
13+
OCIRequestTypeResolveRevision = "resolve-revision"
14+
OCIRequestTypeDigestMetadata = "digest-metadata"
15+
OCIRequestTypeGetTags = "get-tags"
16+
OCIRequestTypeTestRepo = "test-repo"
17+
)
18+
19+
// NewOCIClientEventHandlers creates event handlers to update OCI repo, related metrics
20+
func NewOCIClientEventHandlers(metricsServer *MetricsServer) oci.EventHandlers {
21+
return oci.EventHandlers{
22+
OnExtract: func(repo string) func() {
23+
return processMetricFunc(metricsServer, repo, OCIRequestTypeExtract)
24+
},
25+
OnResolveRevision: func(repo string) func() {
26+
return processMetricFunc(metricsServer, repo, OCIRequestTypeResolveRevision)
27+
},
28+
OnDigestMetadata: func(repo string) func() {
29+
return processMetricFunc(metricsServer, repo, OCIRequestTypeDigestMetadata)
30+
},
31+
OnGetTags: func(repo string) func() {
32+
return processMetricFunc(metricsServer, repo, OCIRequestTypeGetTags)
33+
},
34+
OnTestRepo: func(repo string) func() {
35+
return processMetricFunc(metricsServer, repo, OCIRequestTypeTestRepo)
36+
},
37+
OnExtractFail: func(repo string) func(revision string) {
38+
return func(revision string) { metricsServer.IncOCIExtractFailCounter(repo, revision) }
39+
},
40+
OnResolveRevisionFail: func(repo string) func(revision string) {
41+
return func(revision string) { metricsServer.IncOCIResolveRevisionFailCounter(repo, revision) }
42+
},
43+
OnDigestMetadataFail: func(repo string) func(revision string) {
44+
return func(revision string) { metricsServer.IncOCIDigestMetadataCounter(repo, revision) }
45+
},
46+
OnGetTagsFail: func(repo string) func() {
47+
return func() { metricsServer.IncOCIGetTagsFailCounter(repo) }
48+
},
49+
OnTestRepoFail: func(repo string) func() {
50+
return func() { metricsServer.IncOCITestRepoFailCounter(repo) }
51+
},
52+
}
53+
}
54+
55+
func processMetricFunc(metricsServer *MetricsServer, repo string, requestType OCIRequestType) func() {
56+
startTime := time.Now()
57+
metricsServer.IncOCIRequest(repo, requestType)
58+
return func() {
59+
metricsServer.ObserveOCIRequestDuration(repo, requestType, time.Since(startTime))
60+
}
61+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package metrics
2+
3+
import (
4+
"testing"
5+
6+
"github.com/prometheus/client_golang/prometheus/testutil"
7+
"github.com/stretchr/testify/assert"
8+
)
9+
10+
func TestOCIClientEventHandlers(t *testing.T) {
11+
tests := []struct {
12+
name string
13+
setup func()
14+
teardown func()
15+
testFunc func(t *testing.T)
16+
}{
17+
{
18+
name: "test event handlers",
19+
testFunc: func(t *testing.T) {
20+
t.Helper()
21+
revision := "1.2.3"
22+
assert.NotPanics(t, func() {
23+
metricsServer := NewMetricsServer()
24+
eventHandlers := NewOCIClientEventHandlers(metricsServer)
25+
eventHandlers.OnExtract("test")()
26+
eventHandlers.OnTestRepo("test")()
27+
eventHandlers.OnGetTags("test")()
28+
eventHandlers.OnResolveRevision("test")()
29+
eventHandlers.OnDigestMetadata("test")()
30+
eventHandlers.OnExtractFail("test")(revision)
31+
eventHandlers.OnTestRepoFail("test")()
32+
eventHandlers.OnGetTagsFail("test")()
33+
eventHandlers.OnResolveRevisionFail("test")(revision)
34+
eventHandlers.OnDigestMetadataFail("test")(revision)
35+
c := metricsServer.ociRequestCounter
36+
assert.Equal(t, 5, testutil.CollectAndCount(c))
37+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeExtract)), 0.01)
38+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeResolveRevision)), 0.01)
39+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeDigestMetadata)), 0.01)
40+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeTestRepo)), 0.01)
41+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeTestRepo)), 0.01)
42+
43+
c = metricsServer.ociDigestMetadataCounter
44+
assert.Equal(t, 1, testutil.CollectAndCount(c))
45+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)
46+
47+
c = metricsServer.ociTestRepoFailCounter
48+
assert.Equal(t, 1, testutil.CollectAndCount(c))
49+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test")), 0.01)
50+
51+
c = metricsServer.ociExtractFailCounter
52+
assert.Equal(t, 1, testutil.CollectAndCount(c))
53+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)
54+
55+
c = metricsServer.ociGetTagsFailCounter
56+
assert.Equal(t, 1, testutil.CollectAndCount(c))
57+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test")), 0.01)
58+
59+
c = metricsServer.ociResolveRevisionFailCounter
60+
assert.Equal(t, 1, testutil.CollectAndCount(c))
61+
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)
62+
})
63+
},
64+
},
65+
}
66+
for _, tt := range tests {
67+
t.Run(tt.name, func(t *testing.T) {
68+
if tt.setup != nil {
69+
tt.setup()
70+
}
71+
if tt.teardown != nil {
72+
defer tt.teardown()
73+
}
74+
tt.testFunc(t)
75+
})
76+
}
77+
}

reposerver/repository/repository.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ func (s *Service) Init() error {
190190

191191
// ListOCITags List a subset of the refs (currently, branches and tags) of a git repo
192192
func (s *Service) ListOCITags(ctx context.Context, q *apiclient.ListRefsRequest) (*apiclient.Refs, error) {
193-
ociClient, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
193+
ociClient, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
194194
if err != nil {
195195
return nil, fmt.Errorf("error creating oci client: %w", err)
196196
}
@@ -2493,13 +2493,14 @@ func (s *Service) GetRevisionMetadata(_ context.Context, q *apiclient.RepoServer
24932493
}
24942494

24952495
func (s *Service) GetOCIMetadata(ctx context.Context, q *apiclient.RepoServerRevisionChartDetailsRequest) (*v1alpha1.OCIMetadata, error) {
2496-
client, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
2496+
client, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
24972497
if err != nil {
24982498
return nil, fmt.Errorf("failed to initialize oci client: %w", err)
24992499
}
25002500

25012501
metadata, err := client.DigestMetadata(ctx, q.Revision)
25022502
if err != nil {
2503+
s.metricsServer.IncOCIDigestMetadataCounter(q.Repo.Repo, q.Revision)
25032504
return nil, fmt.Errorf("failed to extract digest metadata for revision %q: %w", q.Revision, err)
25042505
}
25052506

@@ -2589,7 +2590,7 @@ func (s *Service) newClientResolveRevision(repo *v1alpha1.Repository, revision s
25892590
}
25902591

25912592
func (s *Service) newOCIClientResolveRevision(ctx context.Context, repo *v1alpha1.Repository, revision string, noRevisionCache bool) (oci.Client, string, error) {
2592-
ociClient, err := s.newOCIClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
2593+
ociClient, err := s.newOCIClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
25932594
if err != nil {
25942595
return nil, "", fmt.Errorf("failed to initialize oci client: %w", err)
25952596
}
@@ -2786,7 +2787,8 @@ func (s *Service) TestRepository(ctx context.Context, q *apiclient.TestRepositor
27862787
return git.TestRepo(repo.Repo, repo.GetGitCreds(s.gitCredsStore), repo.IsInsecure(), repo.IsLFSEnabled(), repo.Proxy, repo.NoProxy)
27872788
},
27882789
"oci": func() error {
2789-
client, err := oci.NewClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes)
2790+
client, err := oci.NewClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy,
2791+
s.initConstants.OCIMediaTypes, oci.WithEventHandlers(metrics.NewOCIClientEventHandlers(s.metricsServer)))
27902792
if err != nil {
27912793
return err
27922794
}
@@ -3139,3 +3141,13 @@ func (s *Service) updateCachedRevision(logCtx *log.Entry, oldRev string, newRev
31393141
logCtx.Debugf("manifest cache updated for application %s in repo %s from revision %s to revision %s", request.AppName, request.GetRepo().Repo, oldRev, newRev)
31403142
return nil
31413143
}
3144+
3145+
func (s *Service) ociClientStandardOpts() []oci.ClientOpts {
3146+
return []oci.ClientOpts{
3147+
oci.WithIndexCache(s.cache),
3148+
oci.WithImagePaths(s.ociPaths),
3149+
oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize),
3150+
oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize),
3151+
oci.WithEventHandlers(metrics.NewOCIClientEventHandlers(s.metricsServer)),
3152+
}
3153+
}

0 commit comments

Comments
 (0)