diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java index 90ff9a166e0..959244bd5d2 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientContext.java @@ -1193,23 +1193,19 @@ protected synchronized ThriftTransportPool getTransportPoolImpl(boolean shouldHa return thriftTransportPool; } - public MeterRegistry getMeterRegistry() { - ensureOpen(); - return micrometer; - } - - public void setMeterRegistry(MeterRegistry micrometer) { + public synchronized void setMeterRegistry(MeterRegistry micrometer) { ensureOpen(); this.micrometer = micrometer; - getCaches(); + if (caches != null) { + caches.registerMetrics(micrometer); + } } public synchronized Caches getCaches() { ensureOpen(); if (caches == null) { caches = Caches.getInstance(); - if (micrometer != null - && getConfiguration().getBoolean(Property.GENERAL_MICROMETER_CACHE_METRICS_ENABLED)) { + if (micrometer != null) { caches.registerMetrics(micrometer); } } diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index cdf70a19a3e..7efc0f5d4f8 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -352,9 +352,6 @@ temporary files (for example, when creating a pre-split table). \ PropertyType.TIMEDURATION, "The maximum amount of time that a Scanner should wait before retrying a failed RPC.", "1.7.3"), - GENERAL_MICROMETER_CACHE_METRICS_ENABLED("general.micrometer.cache.metrics.enabled", "false", - PropertyType.BOOLEAN, "Enables Caffeine Cache metrics functionality using Micrometer.", - "4.0.0"), GENERAL_MICROMETER_ENABLED("general.micrometer.enabled", "false", PropertyType.BOOLEAN, "Enables metrics collection and reporting functionality using Micrometer.", "2.1.0"), GENERAL_MICROMETER_JVM_METRICS_ENABLED("general.micrometer.jvm.metrics.enabled", "false", @@ -1688,8 +1685,7 @@ public static boolean isValidTablePropertyKey(String key) { GENERAL_IDLE_PROCESS_INTERVAL, GENERAL_MICROMETER_ENABLED, GENERAL_MICROMETER_JVM_METRICS_ENABLED, GENERAL_MICROMETER_LOG_METRICS, GENERAL_MICROMETER_FACTORY, GENERAL_SERVER_LOCK_VERIFICATION_INTERVAL, - GENERAL_CACHE_MANAGER_IMPL, GENERAL_MICROMETER_CACHE_METRICS_ENABLED, - GENERAL_LOW_MEM_DETECTOR_INTERVAL, + GENERAL_CACHE_MANAGER_IMPL, GENERAL_LOW_MEM_DETECTOR_INTERVAL, // MANAGER options MANAGER_THREADCHECK, MANAGER_FATE_METRICS_MIN_UPDATE_INTERVAL, MANAGER_METADATA_SUSPENDABLE, diff --git a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java index ce55be21d90..58204cd01a5 100644 --- a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java +++ b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java @@ -83,10 +83,10 @@ public enum Metric { MetricDocSection.COMPACTION, "Majc In Progress", null, NUMBER), COMPACTOR_MAJC_STUCK("accumulo.compaction.majc.stuck", MetricType.LONG_TASK_TIMER, "Number and duration of stuck major compactions.", MetricDocSection.COMPACTION, "Majc Stuck", - null, NUMBER), + null, DURATION), COMPACTOR_MINC_STUCK("accumulo.compaction.minc.stuck", MetricType.LONG_TASK_TIMER, "Number and duration of stuck minor compactions.", MetricDocSection.COMPACTION, "Minc Stuck", - null, NUMBER), + null, DURATION), COMPACTOR_ENTRIES_READ("accumulo.compaction.entries.read", MetricType.FUNCTION_COUNTER, "Number of entries read by all compactions that have run on this compactor (majc) or tserver (minc).", MetricDocSection.COMPACTION, "Compaction Entries Read", null, NUMBER), @@ -122,7 +122,7 @@ public enum Metric { MetricDocSection.COMPACTION, "Compaction Queue Avg Job Age", null, NUMBER), COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_POLL_TIMER("accumulo.compaction.queue.jobs.exit.time", MetricType.TIMER, "Tracks time a job spent in the queue before exiting the queue.", - MetricDocSection.COMPACTION, "Compaction Queue Job Time Queued", null, NUMBER), + MetricDocSection.COMPACTION, "Compaction Queue Job Time Queued", null, DURATION), // Fate Metrics FATE_TYPE_IN_PROGRESS("accumulo.fate.ops.in.progress.by.type", MetricType.GAUGE, @@ -241,24 +241,27 @@ public enum Metric { // Scan Server Metrics SCAN_RESERVATION_TOTAL_TIMER("accumulo.scan.reservation.total.timer", MetricType.TIMER, - "Time to reserve a tablet's files for scan.", MetricDocSection.SCAN_SERVER, - "Scan Reservation Total Time", null, NUMBER), + "Average time to reserve a tablet's files for scan.", MetricDocSection.SCAN_SERVER, + "Mean Reservation", null, DURATION), SCAN_RESERVATION_WRITEOUT_TIMER("accumulo.scan.reservation.writeout.timer", MetricType.TIMER, "Time to write out a tablets file reservations for scan.", MetricDocSection.SCAN_SERVER, - "Scan Reservation Write Time", null, NUMBER), + "Scan Reservation Write Time", null, DURATION), + SCAN_RESERVATION_FILES("accumulo.scan.reservation.files", MetricType.GAUGE, + "The number of files reserved by a scan server.", MetricDocSection.SCAN_SERVER, + "Files Reserved", null, NUMBER), SCAN_RESERVATION_CONFLICT_COUNTER("accumulo.scan.reservation.conflict.count", MetricType.COUNTER, "Count of instances where file reservation attempts for scans encountered conflicts.", MetricDocSection.SCAN_SERVER, "Scan Reservation Conflicts", null, NUMBER), SCAN_TABLET_METADATA_CACHE("accumulo.scan.tablet.metadata.cache", MetricType.CACHE, "Scan server tablet cache metrics.", MetricDocSection.SCAN_SERVER, "Scan Server Tablet Cache", null, NUMBER), - - // Scan Metrics SCAN_BUSY_TIMEOUT_COUNT("accumulo.scan.busy.timeout.count", MetricType.FUNCTION_COUNTER, "Count of the scans where a busy timeout happened.", MetricDocSection.SCAN, "Scan Busy Count", null, NUMBER), + + // Scan Metrics SCAN_TIMES("accumulo.scan.times", MetricType.TIMER, "Scan session lifetime (creation to close).", - MetricDocSection.SCAN, "Scan Session Total Time", null, NUMBER), + MetricDocSection.SCAN, "Scan Session Total Time", null, DURATION), SCAN_OPEN_FILES("accumulo.scan.files.open", MetricType.GAUGE, "Number of files open for scans.", MetricDocSection.SCAN, "Scan Files Open", null, NUMBER), SCAN_RESULTS("accumulo.scan.result", MetricType.DISTRIBUTION_SUMMARY, "Results per scan.", @@ -307,10 +310,11 @@ public enum Metric { // Minor Compaction Metrics MINC_QUEUED("accumulo.compaction.minc.queued", MetricType.TIMER, "Queued minor compactions time queued.", MetricDocSection.COMPACTION, "Minc Queued", null, - NUMBER), + DURATION), MINC_RUNNING("accumulo.compaction.minc.running", MetricType.TIMER, - "Minor compactions time active.", MetricDocSection.COMPACTION, "Minc Running", null, NUMBER), - MINC_PAUSED("accumulo.compaction.minc.paused", MetricType.COUNTER, + "Minor compactions time active.", MetricDocSection.COMPACTION, "Minc Running", null, + DURATION), + MINC_PAUSED("accumulo.compaction.minc.paused", MetricType.FUNCTION_COUNTER, "Number of paused minor compactions.", MetricDocSection.COMPACTION, "Minc Paused", null, NUMBER), @@ -320,19 +324,19 @@ public enum Metric { MetricDocSection.TABLET_SERVER, "Ingest Errors", null, NUMBER), UPDATE_LOCK("accumulo.updates.lock", MetricType.TIMER, "Average time taken for conditional mutation to get a row lock.", - MetricDocSection.TABLET_SERVER, "Conditional Mutation Row Lock Wait Time", null, NUMBER), + MetricDocSection.TABLET_SERVER, "Conditional Mutation Row Lock Wait Time", null, DURATION), UPDATE_CHECK("accumulo.updates.check", MetricType.TIMER, "Average time taken for conditional mutation to check conditions.", - MetricDocSection.TABLET_SERVER, "Conditional Mutation Condition Check Time", null, NUMBER), + MetricDocSection.TABLET_SERVER, "Conditional Mutation Condition Check Time", null, DURATION), UPDATE_COMMIT("accumulo.updates.commit", MetricType.TIMER, "Average time taken to commit a mutation.", MetricDocSection.TABLET_SERVER, - "Mutation Commit Avg Total Time", null, NUMBER), + "Mutation Commit Avg Total Time", null, DURATION), UPDATE_COMMIT_PREP("accumulo.updates.commit.prep", MetricType.TIMER, "Average time taken to prepare to commit a single mutation.", MetricDocSection.TABLET_SERVER, - "Mutation Commit Avg Prep Time", null, NUMBER), + "Mutation Commit Avg Prep Time", null, DURATION), UPDATE_WALOG_WRITE("accumulo.updates.walog.write", MetricType.TIMER, "Time taken to write a batch of mutations to WAL.", MetricDocSection.TABLET_SERVER, - "WAL Write Time", null, NUMBER), + "WAL Write Time", null, DURATION), UPDATE_MUTATION_ARRAY_SIZE("accumulo.updates.mutation.arrays.size", MetricType.DISTRIBUTION_SUMMARY, "Batch size of mutations from client.", MetricDocSection.TABLET_SERVER, "Mutation Batch Size", null, NUMBER), @@ -399,7 +403,11 @@ public enum Metric { MetricDocSection.GENERAL_SERVER, "Completed task", null, NUMBER), EXECUTOR_QUEUED("executor.queued", MetricType.GAUGE, "Task queued for a thread pool. Each thread pool emits this metric w/ a different tag.", - MetricDocSection.GENERAL_SERVER, "Queued task", null, NUMBER); + MetricDocSection.GENERAL_SERVER, "Queued task", null, NUMBER), + + // Cache metrics + CACHE_SIZE("cache.size", MetricType.GAUGE, "The current number of entries a cache has.", + MetricDocSection.GENERAL_SERVER, "Cache size", null, NUMBER); public enum MonitorCssClass { BYTES("big-size"), diff --git a/server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java b/server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java index a1a4bacfe04..4656c76d172 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java +++ b/server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java @@ -410,7 +410,8 @@ public MetricResponse getMetrics(TInfo tinfo, TCredentials credentials) throws T registry.getMeters().forEach(m -> { if (m.getId().getName().startsWith("accumulo.") || m.getId().getName().equals(Metric.EXECUTOR_COMPLETED.getName()) - || m.getId().getName().equals(Metric.EXECUTOR_QUEUED.getName())) { + || m.getId().getName().equals(Metric.EXECUTOR_QUEUED.getName()) + || m.getId().getName().equals(Metric.CACHE_SIZE.getName())) { if (!this.monitorMetricExclusions.contains(m.getId().getName())) { m.match(response::writeMeter, response::writeMeter, response::writeTimer, response::writeDistributionSummary, response::writeLongTaskTimer, diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/CacheSizeColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/CacheSizeColumnFactory.java new file mode 100644 index 00000000000..05a2194d8a9 --- /dev/null +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/CacheSizeColumnFactory.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.monitor.next.views; + +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.client.admin.servers.ServerId; +import org.apache.accumulo.core.metrics.Metric; +import org.apache.accumulo.core.metrics.flatbuffers.FMetric; +import org.apache.accumulo.core.metrics.flatbuffers.FTag; +import org.apache.accumulo.core.process.thrift.MetricResponse; +import org.apache.accumulo.monitor.next.SystemInformation; + +import com.google.common.base.Preconditions; + +public class CacheSizeColumnFactory implements ColumnFactory { + + private final String cacheName; + private final TableData.Column column; + + CacheSizeColumnFactory(String cacheName, String colHeader, String description) { + this.cacheName = cacheName; + Preconditions.checkState(Metric.CACHE_SIZE.getColumnClasses().length == 1); + this.column = new TableData.Column(Metric.CACHE_SIZE.getName() + "_" + cacheName, colHeader, + description, Metric.CACHE_SIZE.getColumnClasses()[0].getCssClass()); + } + + @Override + public TableData.Column getColumn() { + return column; + } + + @Override + public Object getRowData(ServerId sid, MetricResponse mr, + Map> serverMetrics) { + var tag = new FTag(); + for (var metric : serverMetrics.getOrDefault(Metric.CACHE_SIZE.getName(), List.of())) { + for (int i = 0; i < metric.tagsLength(); i++) { + metric.tags(tag, i); + if ("cache".equals(tag.key()) && cacheName.equals(tag.value())) { + return SystemInformation.getMetricValue(metric); + } + } + } + return null; + } +} diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ColumnFactory.java index 3b13a692568..6b07c10bdc2 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ColumnFactory.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ColumnFactory.java @@ -20,10 +20,12 @@ import java.util.List; import java.util.Map; +import java.util.function.Predicate; import org.apache.accumulo.core.client.admin.servers.ServerId; import org.apache.accumulo.core.metrics.MonitorMeterRegistry; import org.apache.accumulo.core.metrics.flatbuffers.FMetric; +import org.apache.accumulo.core.metrics.flatbuffers.FTag; import org.apache.accumulo.core.process.thrift.MetricResponse; import org.apache.accumulo.monitor.next.SystemInformation; import org.apache.accumulo.monitor.next.views.TableData.Column; @@ -56,10 +58,14 @@ default Number add(Number n1, Number n2) { } } - default Number sum(List metrics) { + default Number sum(List metrics, Predicate statPredicate) { Number sum = null; + FTag tag = new FTag(); for (var metric : metrics) { - sum = add(sum, SystemInformation.getMetricValue(metric)); + var statValue = TableDataFactory.extractStatistic(metric, tag); + if (statPredicate.test(statValue)) { + sum = add(sum, SystemInformation.getMetricValue(metric)); + } } return sum; } diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ExecutorColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ExecutorColumnFactory.java index 2e611e00668..0d251e6a54f 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ExecutorColumnFactory.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/ExecutorColumnFactory.java @@ -29,6 +29,7 @@ import org.apache.accumulo.core.process.thrift.MetricResponse; import org.apache.accumulo.monitor.next.SystemInformation; import org.apache.accumulo.monitor.next.views.TableData.Column; +import org.apache.accumulo.monitor.next.views.TableDataFactory.StatType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,6 +90,7 @@ public Object getRowData(ServerId sid, MetricResponse mr, Number sum = null; FTag ftag = new FTag(); + FTag statTag = new FTag(); for (var metric : metrics) { boolean foundTag = false; String tag = null; @@ -103,9 +105,9 @@ public Object getRowData(ServerId sid, MetricResponse mr, } } - var metricStatistic = TableDataFactory.extractStatistic(metric); - if (foundTag && (metricStatistic == null || metricStatistic.equals("value") - || metricStatistic.equals("count"))) { + var metricStatistic = TableDataFactory.extractStatistic(metric, statTag); + if (foundTag && (metricStatistic == null || metricStatistic.equals(StatType.VALUE) + || metricStatistic.equals(StatType.COUNT))) { var val = SystemInformation.getMetricValue(metric); log.trace("adding {}+{} for {} {} {} {}", sum, val, metric.name(), tag, sid.toHostPortString(), sid.getResourceGroup()); diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnFactory.java index 10c0372f74e..4ff01bd0577 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnFactory.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MetricColumnFactory.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.stream.Collectors; import org.apache.accumulo.core.client.admin.servers.ServerId; @@ -28,11 +29,13 @@ import org.apache.accumulo.core.metrics.flatbuffers.FMetric; import org.apache.accumulo.core.process.thrift.MetricResponse; import org.apache.accumulo.monitor.next.views.TableData.Column; +import org.apache.accumulo.monitor.next.views.TableDataFactory.StatType; public class MetricColumnFactory implements ColumnFactory { private final Column column; private final boolean computeRate; + private final Predicate statPredicate; MetricColumnFactory(Metric metric) { String classes; @@ -50,6 +53,14 @@ public class MetricColumnFactory implements ColumnFactory { } this.column = new Column(metric.getName(), metric.getColumnHeader(), metric.getColumnDescription(), classes); + + statPredicate = switch (metric.getType()) { + case GAUGE -> sv -> sv.equals(StatType.VALUE); + case COUNTER, FUNCTION_COUNTER -> sv -> sv.equals(StatType.COUNT); + case TIMER, DISTRIBUTION_SUMMARY -> sv -> sv.equals(StatType.AVERAGE); + case LONG_TASK_TIMER -> sv -> sv.equals(StatType.MAX); + case CACHE -> StatType.COUNT_OR_VALUE; // TODO this class does not really support this type + }; } @Override @@ -60,7 +71,7 @@ public Column getColumn() { @Override public Object getRowData(ServerId sid, MetricResponse mr, Map> serverMetrics) { - var sum = sum(serverMetrics.getOrDefault(column.key(), List.of())); + var sum = sum(serverMetrics.getOrDefault(column.key(), List.of()), statPredicate); if (computeRate) { return computeRate(sum); } else { diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MultiSumColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MultiSumColumnFactory.java new file mode 100644 index 00000000000..02bd3c73fe7 --- /dev/null +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/MultiSumColumnFactory.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.monitor.next.views; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.accumulo.core.client.admin.servers.ServerId; +import org.apache.accumulo.core.metrics.Metric; +import org.apache.accumulo.core.metrics.flatbuffers.FMetric; +import org.apache.accumulo.core.process.thrift.MetricResponse; + +import com.google.common.base.Preconditions; +import com.google.common.hash.Hashing; + +/** + * Sums multiple metrics of the same type for a column value + */ +public class MultiSumColumnFactory implements ColumnFactory { + + private final List colFactories; + private final TableData.Column column; + + MultiSumColumnFactory(String label, Metric... metrics) { + Preconditions.checkArgument(metrics.length > 1); + // ensure all metrics are of the same type + Preconditions + .checkArgument(Arrays.stream(metrics).allMatch(m -> m.getType() == metrics[0].getType())); + // ensure all metrics have the same display type + Preconditions.checkArgument( + Arrays.stream(metrics).allMatch(m -> Set.of(Arrays.asList(m.getColumnClasses())) + .equals(Set.of(Arrays.asList(metrics[0].getColumnClasses()))))); + + this.colFactories = Arrays.stream(metrics).map(MetricColumnFactory::new).toList(); + + StringBuilder description = new StringBuilder("A sum of the following metrics :"); + var hasher = Hashing.sha256().newHasher(); + for (int i = 0; i < metrics.length; i++) { + description.append(" "); + description.append((i + 1) + ") " + metrics[i].getDescription()); + if (!metrics[i].getDescription().endsWith(".")) { + description.append("."); + } + hasher.putString(metrics[i].getName(), UTF_8); + } + + var key = hasher.hash().toString(); + + this.column = new TableData.Column(key, label, description.toString(), + colFactories.get(0).getColumn().uiClass()); + } + + @Override + public TableData.Column getColumn() { + return column; + } + + @Override + public Object getRowData(ServerId sid, MetricResponse mr, + Map> serverMetrics) { + Number sum = null; + for (var colf : colFactories) { + sum = add(sum, (Number) colf.getRowData(sid, mr, serverMetrics)); + } + return sum; + } +} diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/RatioColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/RatioColumnFactory.java index 15c4be5f9cd..44a532abb3f 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/RatioColumnFactory.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/RatioColumnFactory.java @@ -26,6 +26,7 @@ import org.apache.accumulo.core.metrics.flatbuffers.FMetric; import org.apache.accumulo.core.process.thrift.MetricResponse; import org.apache.accumulo.monitor.next.views.TableData.Column; +import org.apache.accumulo.monitor.next.views.TableDataFactory.StatType; public class RatioColumnFactory implements ColumnFactory { @@ -55,8 +56,8 @@ public Object getRowData(ServerId sid, MetricResponse mr, return null; } - var numeratorSum = sum(n).doubleValue(); - var denominatorSum = sum(n).doubleValue(); + var numeratorSum = sum(n, StatType.COUNT_OR_VALUE).doubleValue(); + var denominatorSum = sum(d, StatType.COUNT_OR_VALUE).doubleValue(); if (denominatorSum == 0) { return null; diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TableDataFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TableDataFactory.java index e3eb7310f84..d78695bf924 100644 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TableDataFactory.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TableDataFactory.java @@ -79,6 +79,15 @@ public enum TableName { TABLET_SERVERS } + public static class StatType { + public static final String COUNT = "count"; + public static final String VALUE = "value"; + public static final String AVERAGE = "avg"; + public static final String MAX = "max"; + + public static final Predicate COUNT_OR_VALUE = s -> COUNT.equals(s) || VALUE.equals(s); + } + /** * Common columns that are included in every ServersView table */ @@ -183,43 +192,20 @@ public static List columnsFor(TableName table) { case MANAGER_FATE -> managerFateMetrics().forEach(m -> cols.add(new MetricColumnFactory(m))); case MANAGER_COMPACTIONS -> managerCompactionMetrics().forEach(m -> cols.add(new MetricColumnFactory(m))); - case SCAN_SERVERS -> scanServerMetrics().forEach(m -> cols.add(new MetricColumnFactory(m))); + case SCAN_SERVERS -> scanServerColumns(cols); case TABLET_SERVERS -> tabletServerColumns(cols); } return cols; } private static void tabletServerColumns(List cols) { - cols.add(new MetricColumnFactory(Metric.SERVER_IDLE)); - cols.add(new MetricColumnFactory(Metric.LOW_MEMORY)); + + commonColumns(cols); cols.add(new MetricColumnFactory(Metric.TSERVER_TABLETS_ONLINE)); cols.add(new MetricColumnFactory(Metric.TSERVER_TABLETS_LONG_ASSIGNMENTS)); - cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.COMPLETED, - ThreadPoolNames.RPC_POOL.poolName, "Completed RPCs", - "Task completed by the Thrift thread pool")); - cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.QUEUED, - ThreadPoolNames.RPC_POOL.poolName, "Queued RPCs", - "Task queued for the Thrift thread pool")); - - // Scan columns - cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.COMPLETED, - ThreadPoolNames.SCAN_EXECUTOR_PREFIX.poolName, "Completed scans", - "Scan task completed by all scan thread pools")); - cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.QUEUED, - ThreadPoolNames.SCAN_EXECUTOR_PREFIX.poolName, "Queued scans", - "Scan task queued on all scan thread pools")); - cols.add(new MetricColumnFactory(Metric.SCAN_ERRORS)); - cols.add(new MetricColumnFactory(Metric.SCAN_SCANNED_ENTRIES)); - cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS)); - cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES)); - cols.add(new RatioColumnFactory("Index cache hit", - "Ratio of hits/total request for the index block cache", Metric.BLOCKCACHE_INDEX_HITCOUNT, - Metric.BLOCKCACHE_INDEX_REQUESTCOUNT)); - cols.add(new RatioColumnFactory("Data cache hit", - "Ratio of hits/total request for the data block cache", Metric.BLOCKCACHE_DATA_HITCOUNT, - Metric.BLOCKCACHE_DATA_REQUESTCOUNT)); + scanColumns(cols); // Ingest and minc cols.add(new MetricColumnFactory(Metric.TSERVER_INGEST_ENTRIES)); @@ -247,25 +233,69 @@ private static void tabletServerColumns(List cols) { // TODO create scan problems that is a sum of zombie and low memory } + private static void commonColumns(List cols) { + cols.add(new MetricColumnFactory(Metric.SERVER_IDLE)); + cols.add(new MetricColumnFactory(Metric.LOW_MEMORY)); + + cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.COMPLETED, + ThreadPoolNames.RPC_POOL.poolName, "Completed RPCs", + "Task completed by the Thrift thread pool")); + cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.QUEUED, + ThreadPoolNames.RPC_POOL.poolName, "Queued RPCs", + "Task queued for the Thrift thread pool")); + } + + private static void scanColumns(List cols) { + cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.COMPLETED, + ThreadPoolNames.SCAN_EXECUTOR_PREFIX.poolName, "Completed scans", + "Scan task completed by all scan thread pools")); + cols.add(new ExecutorColumnFactory(ExecutorColumnFactory.Type.QUEUED, + ThreadPoolNames.SCAN_EXECUTOR_PREFIX.poolName, "Queued scans", + "Scan task queued on all scan thread pools")); + cols.add(new MultiSumColumnFactory("Scan Problems", Metric.SCAN_ERRORS, + Metric.SCAN_PAUSED_FOR_MEM, Metric.SCAN_RETURN_FOR_MEM)); + cols.add(new MetricColumnFactory(Metric.SCAN_SCANNED_ENTRIES)); + cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS)); + cols.add(new MetricColumnFactory(Metric.SCAN_QUERY_SCAN_RESULTS_BYTES)); + cols.add(new RatioColumnFactory("Index cache hit", + "Ratio of hits/total request for the index block cache", Metric.BLOCKCACHE_INDEX_HITCOUNT, + Metric.BLOCKCACHE_INDEX_REQUESTCOUNT)); + cols.add(new RatioColumnFactory("Data cache hit", + "Ratio of hits/total request for the data block cache", Metric.BLOCKCACHE_DATA_HITCOUNT, + Metric.BLOCKCACHE_DATA_REQUESTCOUNT)); + cols.add(new MetricColumnFactory(Metric.SCAN_OPEN_FILES)); + cols.add(new MetricColumnFactory(Metric.SCAN_YIELDS)); + } + + private static void scanServerColumns(List cols) { + commonColumns(cols); + scanColumns(cols); + cols.add(new CacheSizeColumnFactory(Metric.SCAN_TABLET_METADATA_CACHE.getName(), + "Tablets Cached", "The number of tablets for which a scan server has cached metadata.")); + cols.add(new MetricColumnFactory(Metric.SCAN_RESERVATION_FILES)); + cols.add(new MetricColumnFactory(Metric.SCAN_BUSY_TIMEOUT_COUNT)); + cols.add(new TimerColumnFactory(Metric.SCAN_RESERVATION_TOTAL_TIMER)); + } + public static Map> metricValuesByName(MetricResponse response) { var values = new HashMap>(); if (response == null || response.getMetrics() == null || response.getMetrics().isEmpty()) { return values; } + FTag tag = new FTag(); for (var binary : response.getMetrics()) { var metric = FMetric.getRootAsFMetric(binary); - var metricStatistic = extractStatistic(metric); - if (metricStatistic == null || metricStatistic.equals("value") - || metricStatistic.equals("count")) { + var metricStatistic = extractStatistic(metric, tag); + if (metricStatistic == null || metricStatistic.equals(StatType.VALUE) + || metricStatistic.equals(StatType.COUNT) || metricStatistic.equals(StatType.AVERAGE)) { values.computeIfAbsent(metric.name(), m -> new ArrayList<>()).add(metric); } } return values; } - public static String extractStatistic(FMetric metric) { - FTag tag = new FTag(); + public static String extractStatistic(FMetric metric, FTag tag) { for (int i = 0; i < metric.tagsLength(); i++) { tag = metric.tags(tag, i); if (MetricResponseWrapper.STATISTIC_TAG.equals(tag.key())) { @@ -339,11 +369,6 @@ private static List managerCompactionMetrics() { && !m.getName().startsWith(MINC_COMPACTION.getPrefix()), MetricDocSection.COMPACTION); } - private static List scanServerMetrics() { - return metricList(null, MetricDocSection.GENERAL_SERVER, MetricDocSection.SCAN_SERVER, - MetricDocSection.SCAN, MetricDocSection.BLOCK_CACHE); - } - /** * @return all the metrics for the given sections */ diff --git a/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TimerColumnFactory.java b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TimerColumnFactory.java new file mode 100644 index 00000000000..12344210e52 --- /dev/null +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/next/views/TimerColumnFactory.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.monitor.next.views; + +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.client.admin.servers.ServerId; +import org.apache.accumulo.core.metrics.Metric; +import org.apache.accumulo.core.metrics.flatbuffers.FMetric; +import org.apache.accumulo.core.metrics.flatbuffers.FTag; +import org.apache.accumulo.core.process.thrift.MetricResponse; +import org.apache.accumulo.monitor.next.SystemInformation; +import org.apache.accumulo.monitor.next.views.TableDataFactory.StatType; + +import com.google.common.base.Preconditions; + +public class TimerColumnFactory implements ColumnFactory { + + private final TableData.Column column; + + public TimerColumnFactory(Metric metric) { + Preconditions.checkArgument(metric.getType() == Metric.MetricType.TIMER); + + this.column = new TableData.Column(metric.getName(), metric.getColumnHeader(), + metric.getColumnDescription(), Metric.MonitorCssClass.DURATION.getCssClass()); + + } + + @Override + public TableData.Column getColumn() { + return column; + } + + @Override + public Object getRowData(ServerId sid, MetricResponse mr, + Map> serverMetrics) { + var metrics = serverMetrics.getOrDefault(column.key(), List.of()); + FTag ftag = new FTag(); + for (var metric : metrics) { + for (int i = 0; i < metric.tagsLength(); i++) { + metric.tags(ftag, i); + } + String statVal = TableDataFactory.extractStatistic(metric, ftag); + if (StatType.AVERAGE.equals(statVal)) { + // The average time is in seconds, convert it to millis for the monitor front end code. + return SystemInformation.getMetricValue(metric).doubleValue() * 1000; + } + } + return null; + } +} diff --git a/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/functions.js b/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/functions.js index a449eb78d5c..e315d206cd9 100644 --- a/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/functions.js +++ b/server/monitor/src/main/resources/org/apache/accumulo/monitor/resources/js/functions.js @@ -228,13 +228,17 @@ function timeDuration(time) { var ms, sec, min, hr, day, yr; ms = sec = min = hr = day = yr = -1; - time = Math.floor(time); - - // If time is 0 return a dash - if (time === 0) { - return '—'; + if (Math.round(time) === 0) { + var microsecs = Math.round(time * 1000) + if(microsecs == 0){ + return '0μs'; + }else{ + return microsecs+'μs'; + } } + time = Math.round(time); + // Obtains the milliseconds, if time is 0, return milliseconds, and units ms = time % 1000; time = Math.floor(time / 1000); diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java index bcfc99369f0..40b552ce1e2 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java @@ -381,7 +381,7 @@ public void run() { scanMetrics = new TabletServerScanMetrics(resourceManager::getOpenFiles); sessionManager.setZombieCountConsumer(scanMetrics::setZombieScanThreads); - scanServerMetrics = new ScanServerMetrics(tabletMetadataCache); + scanServerMetrics = new ScanServerMetrics(tabletMetadataCache, reservedFiles::size); blockCacheMetrics = new BlockCacheMetrics(resourceManager.getIndexCache(), resourceManager.getDataCache(), resourceManager.getSummaryCache()); @@ -668,7 +668,7 @@ public void close() { */ private Map reserveFilesInner(Collection extents, long myReservationId, Set failures) throws AccumuloException { - // RFS is an acronym for Reference files for scan + // RFFS is an acronym for Reference files for scan LOG.debug("RFFS {} ensuring files are referenced for scan of extents {}", myReservationId, extents); diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java index 1f558021e0a..d0710cf8264 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java @@ -20,12 +20,14 @@ import static org.apache.accumulo.core.metrics.Metric.SCAN_BUSY_TIMEOUT_COUNT; import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_CONFLICT_COUNTER; +import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_FILES; import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_TOTAL_TIMER; import static org.apache.accumulo.core.metrics.Metric.SCAN_RESERVATION_WRITEOUT_TIMER; import static org.apache.accumulo.core.metrics.Metric.SCAN_TABLET_METADATA_CACHE; import java.time.Duration; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.IntSupplier; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.metadata.schema.TabletMetadata; @@ -36,12 +38,14 @@ import com.google.common.base.Preconditions; import io.micrometer.core.instrument.FunctionCounter; +import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Timer; import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics; public class ScanServerMetrics implements MetricsProducer { + private final IntSupplier reservedFilesSupplier; private Timer totalReservationTimer = NoopMetrics.useNoopTimer(); private Timer writeOutReservationTimer = NoopMetrics.useNoopTimer(); private final AtomicLong busyTimeoutCount = new AtomicLong(0); @@ -49,8 +53,10 @@ public class ScanServerMetrics implements MetricsProducer { private final LoadingCache tabletMetadataCache; - public ScanServerMetrics(final LoadingCache tabletMetadataCache) { + public ScanServerMetrics(final LoadingCache tabletMetadataCache, + IntSupplier reservedFilesSupplier) { this.tabletMetadataCache = tabletMetadataCache; + this.reservedFilesSupplier = reservedFilesSupplier; } @Override @@ -65,6 +71,8 @@ public void registerMetrics(MeterRegistry registry) { .builder(SCAN_RESERVATION_CONFLICT_COUNTER.getName(), reservationConflictCount, AtomicLong::get) .description(SCAN_RESERVATION_CONFLICT_COUNTER.getDescription()).register(registry); + Gauge.builder(SCAN_RESERVATION_FILES.getName(), () -> (double) reservedFilesSupplier.getAsInt()) + .description(SCAN_RESERVATION_FILES.getDescription()).register(registry); if (tabletMetadataCache != null) { Preconditions.checkState(tabletMetadataCache.policy().isRecordingStats(), diff --git a/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java b/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java index 705ffd0bdef..e5fc8da1fcd 100644 --- a/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java +++ b/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java @@ -99,7 +99,6 @@ public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration hadoo // that will be configured to push all metrics to the sink we started. cfg.setProperty(Property.GENERAL_MICROMETER_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_USER_TAGS, "tag1=value1,tag2=value2"); - cfg.setProperty(Property.GENERAL_MICROMETER_CACHE_METRICS_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_JVM_METRICS_ENABLED, "true"); cfg.setProperty("general.custom.metrics.opts.logging.step", "10s"); String clazzList = LoggingMeterRegistryFactory.class.getName() + "," diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java index 255fb463c17..16b179b6d0a 100644 --- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java @@ -126,7 +126,6 @@ protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSit // that will be configured to push all metrics to the sink we started. cfg.setProperty(Property.GENERAL_MICROMETER_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_USER_TAGS, "tag1=value1,tag2=value2"); - cfg.setProperty(Property.GENERAL_MICROMETER_CACHE_METRICS_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_JVM_METRICS_ENABLED, "true"); cfg.setProperty("general.custom.metrics.opts.logging.step", "10s"); String clazzList = LoggingMeterRegistryFactory.class.getName() + "," diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsThriftRpcIT.java b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsThriftRpcIT.java index 6cc46a29f91..805a3d6f727 100644 --- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsThriftRpcIT.java +++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsThriftRpcIT.java @@ -71,7 +71,6 @@ protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSit cfg.setProperty(Property.MANAGER_FATE_METRICS_MIN_UPDATE_INTERVAL, "1s"); cfg.setProperty(Property.GENERAL_MICROMETER_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_USER_TAGS, "tag1=value1,tag2=value2"); - cfg.setProperty(Property.GENERAL_MICROMETER_CACHE_METRICS_ENABLED, "true"); cfg.setProperty(Property.GENERAL_MICROMETER_JVM_METRICS_ENABLED, "true"); cfg.getClusterServerConfiguration().setNumDefaultCompactors(1); cfg.getClusterServerConfiguration().setNumDefaultScanServers(1);