Skip to content
23 changes: 15 additions & 8 deletions docs/operator-manual/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,21 @@ Metrics about the Repo Server. The gRPC metrics are not exposed by default. Met
Scraped at the `argocd-repo-server:8084/metrics` endpoint.


| Metric | Type | Description |
| --------------------------------------- | :-------: | ------------------------------------------------------------------------- |
| `argocd_git_request_duration_seconds` | histogram | Git requests duration seconds. |
| `argocd_git_request_total` | counter | Number of git requests performed by repo server |
| `argocd_git_fetch_fail_total` | counter | Number of git fetch requests failures by repo server |
| `argocd_redis_request_duration_seconds` | histogram | Redis requests duration seconds. |
| `argocd_redis_request_total` | counter | Number of Kubernetes requests executed during application reconciliation. |
| `argocd_repo_pending_request_total` | gauge | Number of pending requests requiring repository lock |
| Metric | Type | Description |
|------------------------------------------|:----------:|---------------------------------------------------------------------------|
| `argocd_git_request_duration_seconds` | histogram | Git requests duration seconds. |
| `argocd_git_request_total` | counter | Number of git requests performed by repo server |
| `argocd_git_fetch_fail_total` | counter | Number of git fetch requests failures by repo server |
| `argocd_redis_request_duration_seconds` | histogram | Redis requests duration seconds. |
| `argocd_redis_request_total` | counter | Number of Kubernetes requests executed during application reconciliation. |
| `argocd_repo_pending_request_total` | gauge | Number of pending requests requiring repository lock |
| `argocd_oci_request_total` | counter | Number of OCI requests performed by repo server |
| `argocd_oci_request_duration_seconds` | histogram | Number of OCI fetch requests failures by repo server |
| `argocd_oci_test_repo_fail_total` | counter | Number of OCI test repo requests failures by repo server |
| `argocd_oci_get_tags_fail_total` | counter | Number of OCI get tags requests failures by repo server |
| `argocd_oci_digest_metadata_fail_total` | counter | Number of OCI digest metadata failures by repo server |
| `argocd_oci_resolve_revision_fail_total` | counter | Number of OCI resolve revision failures by repo server |
| `argocd_oci_extract_fail_total` | counter | Number of OCI extract requests failures by repo server |

## Commit Server Metrics

Expand Down
148 changes: 130 additions & 18 deletions reposerver/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,22 @@ import (
)

type MetricsServer struct {
handler http.Handler
gitFetchFailCounter *prometheus.CounterVec
gitLsRemoteFailCounter *prometheus.CounterVec
gitRequestCounter *prometheus.CounterVec
gitRequestHistogram *prometheus.HistogramVec
repoPendingRequestsGauge *prometheus.GaugeVec
redisRequestCounter *prometheus.CounterVec
redisRequestHistogram *prometheus.HistogramVec
PrometheusRegistry *prometheus.Registry
handler http.Handler
gitFetchFailCounter *prometheus.CounterVec
gitLsRemoteFailCounter *prometheus.CounterVec
gitRequestCounter *prometheus.CounterVec
gitRequestHistogram *prometheus.HistogramVec
repoPendingRequestsGauge *prometheus.GaugeVec
redisRequestCounter *prometheus.CounterVec
redisRequestHistogram *prometheus.HistogramVec
ociExtractFailCounter *prometheus.CounterVec
ociResolveRevisionFailCounter *prometheus.CounterVec
ociDigestMetadataCounter *prometheus.CounterVec
ociGetTagsFailCounter *prometheus.CounterVec
ociTestRepoFailCounter *prometheus.CounterVec
ociRequestCounter *prometheus.CounterVec
ociRequestHistogram *prometheus.HistogramVec
PrometheusRegistry *prometheus.Registry
}

type GitRequestType string
Expand Down Expand Up @@ -100,16 +107,87 @@ func NewMetricsServer() *MetricsServer {
)
registry.MustRegister(redisRequestHistogram)

ociExtractFailCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_extract_fail_total",
Help: "Number of OCI extract requests failures by repo server",
},
[]string{"repo", "revision"},
)
registry.MustRegister(ociExtractFailCounter)

ociResolveRevisionFailCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_resolve_revision_fail_total",
Help: "Number of OCI resolve revision requests failures by repo server",
},
[]string{"repo", "revision"},
)
registry.MustRegister(ociResolveRevisionFailCounter)

ociDigestMetadataCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_digest_metadata_fail_total",
Help: "Number of OCI digest metadata requests failures by repo server",
},
[]string{"repo", "revision"},
)
registry.MustRegister(ociDigestMetadataCounter)

ociGetTagsFailCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_get_tags_fail_total",
Help: "Number of OCI get tags failures by repo server",
},
[]string{"repo"},
)
registry.MustRegister(ociGetTagsFailCounter)

ociTestRepoFailCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_test_repo_fail_total",
Help: "Number of OCI test repo requests failures by repo server",
},
[]string{"repo"},
)
registry.MustRegister(ociTestRepoFailCounter)

ociRequestCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_oci_request_total",
Help: "Number of OCI requests performed by repo server",
},
[]string{"repo", "request_type"},
)
registry.MustRegister(ociRequestCounter)

ociRequestHistogram := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "argocd_oci_request_duration_seconds",
Help: "OCI requests duration seconds.",
Buckets: []float64{0.1, 0.25, .5, 1, 2, 4, 10, 20},
},
[]string{"repo", "request_type"},
)
registry.MustRegister(ociRequestHistogram)

return &MetricsServer{
handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
gitFetchFailCounter: gitFetchFailCounter,
gitLsRemoteFailCounter: gitLsRemoteFailCounter,
gitRequestCounter: gitRequestCounter,
gitRequestHistogram: gitRequestHistogram,
repoPendingRequestsGauge: repoPendingRequestsGauge,
redisRequestCounter: redisRequestCounter,
redisRequestHistogram: redisRequestHistogram,
PrometheusRegistry: registry,
handler: promhttp.HandlerFor(registry, promhttp.HandlerOpts{}),
gitFetchFailCounter: gitFetchFailCounter,
gitLsRemoteFailCounter: gitLsRemoteFailCounter,
gitRequestCounter: gitRequestCounter,
gitRequestHistogram: gitRequestHistogram,
repoPendingRequestsGauge: repoPendingRequestsGauge,
redisRequestCounter: redisRequestCounter,
redisRequestHistogram: redisRequestHistogram,
ociRequestCounter: ociRequestCounter,
ociRequestHistogram: ociRequestHistogram,
ociExtractFailCounter: ociExtractFailCounter,
ociResolveRevisionFailCounter: ociResolveRevisionFailCounter,
ociGetTagsFailCounter: ociGetTagsFailCounter,
ociDigestMetadataCounter: ociDigestMetadataCounter,
ociTestRepoFailCounter: ociTestRepoFailCounter,
PrometheusRegistry: registry,
}
}

Expand Down Expand Up @@ -149,3 +227,37 @@ func (m *MetricsServer) IncRedisRequest(failed bool) {
func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) {
m.redisRequestHistogram.WithLabelValues("argocd-repo-server").Observe(duration.Seconds())
}

// IncOCIRequest increments the OCI requests counter
func (m *MetricsServer) IncOCIRequest(repo string, requestType OCIRequestType) {
m.ociRequestCounter.WithLabelValues(repo, string(requestType)).Inc()
}

func (m *MetricsServer) ObserveOCIRequestDuration(repo string, requestType OCIRequestType, duration time.Duration) {
m.ociRequestHistogram.WithLabelValues(repo, string(requestType)).Observe(duration.Seconds())
}

// IncOCIExtractFailCounter increments the OCI failed extract requests counter
func (m *MetricsServer) IncOCIExtractFailCounter(repo string, revision string) {
m.ociExtractFailCounter.WithLabelValues(repo, revision).Inc()
}

// IncOCIResolveRevisionFailCounter increments the OCI failed resolve revision requests counter
func (m *MetricsServer) IncOCIResolveRevisionFailCounter(repo string, revision string) {
m.ociResolveRevisionFailCounter.WithLabelValues(repo, revision).Inc()
}

// IncOCIDigestMetadataCounter increments the OCI failed digest metadata requests counter
func (m *MetricsServer) IncOCIDigestMetadataCounter(repo string, revision string) {
m.ociDigestMetadataCounter.WithLabelValues(repo, revision).Inc()
}

// IncOCIGetTagsFailCounter increments the OCI failed get tags requests counter
func (m *MetricsServer) IncOCIGetTagsFailCounter(repo string) {
m.ociGetTagsFailCounter.WithLabelValues(repo).Inc()
}

// IncOCITestRepoFailCounter increments the OCI failed test repo requests counter
func (m *MetricsServer) IncOCITestRepoFailCounter(repo string) {
m.ociTestRepoFailCounter.WithLabelValues(repo).Inc()
}
61 changes: 61 additions & 0 deletions reposerver/metrics/ocihandlers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package metrics

import (
"time"

"github.com/argoproj/argo-cd/v3/util/oci"
)

type OCIRequestType string

const (
OCIRequestTypeExtract = "extract"
OCIRequestTypeResolveRevision = "resolve-revision"
OCIRequestTypeDigestMetadata = "digest-metadata"
OCIRequestTypeGetTags = "get-tags"
OCIRequestTypeTestRepo = "test-repo"
)

// NewOCIClientEventHandlers creates event handlers to update OCI repo, related metrics
func NewOCIClientEventHandlers(metricsServer *MetricsServer) oci.EventHandlers {
return oci.EventHandlers{
OnExtract: func(repo string) func() {
return processMetricFunc(metricsServer, repo, OCIRequestTypeExtract)
},
OnResolveRevision: func(repo string) func() {
return processMetricFunc(metricsServer, repo, OCIRequestTypeResolveRevision)
},
OnDigestMetadata: func(repo string) func() {
return processMetricFunc(metricsServer, repo, OCIRequestTypeDigestMetadata)
},
OnGetTags: func(repo string) func() {
return processMetricFunc(metricsServer, repo, OCIRequestTypeGetTags)
},
OnTestRepo: func(repo string) func() {
return processMetricFunc(metricsServer, repo, OCIRequestTypeTestRepo)
},
OnExtractFail: func(repo string) func(revision string) {
return func(revision string) { metricsServer.IncOCIExtractFailCounter(repo, revision) }
},
OnResolveRevisionFail: func(repo string) func(revision string) {
return func(revision string) { metricsServer.IncOCIResolveRevisionFailCounter(repo, revision) }
},
OnDigestMetadataFail: func(repo string) func(revision string) {
return func(revision string) { metricsServer.IncOCIDigestMetadataCounter(repo, revision) }
},
OnGetTagsFail: func(repo string) func() {
return func() { metricsServer.IncOCIGetTagsFailCounter(repo) }
},
OnTestRepoFail: func(repo string) func() {
return func() { metricsServer.IncOCITestRepoFailCounter(repo) }
},
}
}

func processMetricFunc(metricsServer *MetricsServer, repo string, requestType OCIRequestType) func() {
startTime := time.Now()
metricsServer.IncOCIRequest(repo, requestType)
return func() {
metricsServer.ObserveOCIRequestDuration(repo, requestType, time.Since(startTime))
}
}
77 changes: 77 additions & 0 deletions reposerver/metrics/ocihandlers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package metrics

import (
"testing"

"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
)

func TestOCIClientEventHandlers(t *testing.T) {
tests := []struct {
name string
setup func()
teardown func()
testFunc func(t *testing.T)
}{
{
name: "test event handlers",
testFunc: func(t *testing.T) {
t.Helper()
revision := "1.2.3"
assert.NotPanics(t, func() {
metricsServer := NewMetricsServer()
eventHandlers := NewOCIClientEventHandlers(metricsServer)
eventHandlers.OnExtract("test")()
eventHandlers.OnTestRepo("test")()
eventHandlers.OnGetTags("test")()
eventHandlers.OnResolveRevision("test")()
eventHandlers.OnDigestMetadata("test")()
eventHandlers.OnExtractFail("test")(revision)
eventHandlers.OnTestRepoFail("test")()
eventHandlers.OnGetTagsFail("test")()
eventHandlers.OnResolveRevisionFail("test")(revision)
eventHandlers.OnDigestMetadataFail("test")(revision)
c := metricsServer.ociRequestCounter
assert.Equal(t, 5, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeExtract)), 0.01)
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeResolveRevision)), 0.01)
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeDigestMetadata)), 0.01)
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeTestRepo)), 0.01)
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", OCIRequestTypeTestRepo)), 0.01)

c = metricsServer.ociDigestMetadataCounter
assert.Equal(t, 1, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)

c = metricsServer.ociTestRepoFailCounter
assert.Equal(t, 1, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test")), 0.01)

c = metricsServer.ociExtractFailCounter
assert.Equal(t, 1, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)

c = metricsServer.ociGetTagsFailCounter
assert.Equal(t, 1, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test")), 0.01)

c = metricsServer.ociResolveRevisionFailCounter
assert.Equal(t, 1, testutil.CollectAndCount(c))
assert.InDelta(t, float64(1), testutil.ToFloat64(c.WithLabelValues("test", revision)), 0.01)
})
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.setup != nil {
tt.setup()
}
if tt.teardown != nil {
defer tt.teardown()
}
tt.testFunc(t)
})
}
}
20 changes: 16 additions & 4 deletions reposerver/repository/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func (s *Service) Init() error {

// ListOCITags List a subset of the refs (currently, branches and tags) of a git repo
func (s *Service) ListOCITags(ctx context.Context, q *apiclient.ListRefsRequest) (*apiclient.Refs, error) {
ociClient, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
ociClient, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
if err != nil {
return nil, fmt.Errorf("error creating oci client: %w", err)
}
Expand Down Expand Up @@ -2493,13 +2493,14 @@ func (s *Service) GetRevisionMetadata(_ context.Context, q *apiclient.RepoServer
}

func (s *Service) GetOCIMetadata(ctx context.Context, q *apiclient.RepoServerRevisionChartDetailsRequest) (*v1alpha1.OCIMetadata, error) {
client, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
client, err := s.newOCIClient(q.Repo.Repo, q.Repo.GetOCICreds(), q.Repo.Proxy, q.Repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
if err != nil {
return nil, fmt.Errorf("failed to initialize oci client: %w", err)
}

metadata, err := client.DigestMetadata(ctx, q.Revision)
if err != nil {
s.metricsServer.IncOCIDigestMetadataCounter(q.Repo.Repo, q.Revision)
return nil, fmt.Errorf("failed to extract digest metadata for revision %q: %w", q.Revision, err)
}

Expand Down Expand Up @@ -2589,7 +2590,7 @@ func (s *Service) newClientResolveRevision(repo *v1alpha1.Repository, revision s
}

func (s *Service) newOCIClientResolveRevision(ctx context.Context, repo *v1alpha1.Repository, revision string, noRevisionCache bool) (oci.Client, string, error) {
ociClient, err := s.newOCIClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes, oci.WithIndexCache(s.cache), oci.WithImagePaths(s.ociPaths), oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize), oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize))
ociClient, err := s.newOCIClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes, s.ociClientStandardOpts()...)
if err != nil {
return nil, "", fmt.Errorf("failed to initialize oci client: %w", err)
}
Expand Down Expand Up @@ -2786,7 +2787,8 @@ func (s *Service) TestRepository(ctx context.Context, q *apiclient.TestRepositor
return git.TestRepo(repo.Repo, repo.GetGitCreds(s.gitCredsStore), repo.IsInsecure(), repo.IsLFSEnabled(), repo.Proxy, repo.NoProxy)
},
"oci": func() error {
client, err := oci.NewClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy, s.initConstants.OCIMediaTypes)
client, err := oci.NewClient(repo.Repo, repo.GetOCICreds(), repo.Proxy, repo.NoProxy,
s.initConstants.OCIMediaTypes, oci.WithEventHandlers(metrics.NewOCIClientEventHandlers(s.metricsServer)))
if err != nil {
return err
}
Expand Down Expand Up @@ -3139,3 +3141,13 @@ func (s *Service) updateCachedRevision(logCtx *log.Entry, oldRev string, newRev
logCtx.Debugf("manifest cache updated for application %s in repo %s from revision %s to revision %s", request.AppName, request.GetRepo().Repo, oldRev, newRev)
return nil
}

func (s *Service) ociClientStandardOpts() []oci.ClientOpts {
return []oci.ClientOpts{
oci.WithIndexCache(s.cache),
oci.WithImagePaths(s.ociPaths),
oci.WithManifestMaxExtractedSize(s.initConstants.OCIManifestMaxExtractedSize),
oci.WithDisableManifestMaxExtractedSize(s.initConstants.DisableOCIManifestMaxExtractedSize),
oci.WithEventHandlers(metrics.NewOCIClientEventHandlers(s.metricsServer)),
}
}
Loading
Loading