Skip to content

Commit cebecd4

Browse files
committed
fix: separate rejected and aborted proving jobs (#20777)
This PR tracks rejected and aborted jobs as separate counters enabling us to setup alerts when any proofs fail. This PR has to be backported otherwise we'll get alerts when jobs get cancelled as well.
1 parent bf6077b commit cebecd4

3 files changed

Lines changed: 21 additions & 2 deletions

File tree

yarn-project/prover-client/src/proving_broker/proving_broker.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer, Pr
314314
// notify listeners of the cancellation
315315
if (!this.resultsCache.has(id)) {
316316
this.logger.info(`Cancelling job id=${id}`, { provingJobId: id });
317-
await this.#reportProvingJobError(id, 'Aborted', false);
317+
await this.#reportProvingJobError(id, 'Aborted', false, undefined, true);
318318
}
319319
}
320320

@@ -395,6 +395,7 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer, Pr
395395
err: string,
396396
retry = false,
397397
filter?: ProvingJobFilter,
398+
aborted = false,
398399
): Promise<GetProvingJobResponse | undefined> {
399400
const info = this.inProgress.get(id);
400401
const item = this.jobsCache.get(id);
@@ -455,7 +456,11 @@ export class ProvingBroker implements ProvingJobProducer, ProvingJobConsumer, Pr
455456
this.promises.get(id)!.resolve(result);
456457
this.completedJobNotifications.push(id);
457458

458-
this.instrumentation.incRejectedJobs(item.type);
459+
if (aborted) {
460+
this.instrumentation.incAbortedJobs(item.type);
461+
} else {
462+
this.instrumentation.incRejectedJobs(item.type);
463+
}
459464
if (info) {
460465
const duration = this.msTimeSource() - info.startedAt;
461466
this.instrumentation.recordJobDuration(item.type, duration);

yarn-project/prover-client/src/proving_broker/proving_broker_instrumentation.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export class ProvingBrokerInstrumentation {
1818
private activeJobs: ObservableGauge;
1919
private resolvedJobs: UpDownCounter;
2020
private rejectedJobs: UpDownCounter;
21+
private abortedJobs: UpDownCounter;
2122
private timedOutJobs: UpDownCounter;
2223
private cachedJobs: UpDownCounter;
2324
private totalJobs: UpDownCounter;
@@ -39,6 +40,8 @@ export class ProvingBrokerInstrumentation {
3940

4041
this.rejectedJobs = createUpDownCounterWithDefault(meter, Metrics.PROVING_QUEUE_REJECTED_JOBS, provingJobAttrs);
4142

43+
this.abortedJobs = createUpDownCounterWithDefault(meter, Metrics.PROVING_QUEUE_ABORTED_JOBS, provingJobAttrs);
44+
4245
this.retriedJobs = createUpDownCounterWithDefault(meter, Metrics.PROVING_QUEUE_RETRIED_JOBS, provingJobAttrs);
4346

4447
this.timedOutJobs = createUpDownCounterWithDefault(meter, Metrics.PROVING_QUEUE_TIMED_OUT_JOBS, provingJobAttrs);
@@ -72,6 +75,12 @@ export class ProvingBrokerInstrumentation {
7275
});
7376
}
7477

78+
incAbortedJobs(proofType: ProvingRequestType) {
79+
this.abortedJobs.add(1, {
80+
[Attributes.PROVING_JOB_TYPE]: ProvingRequestType[proofType],
81+
});
82+
}
83+
7584
incRetriedJobs(proofType: ProvingRequestType) {
7685
this.retriedJobs.add(1, {
7786
[Attributes.PROVING_JOB_TYPE]: ProvingRequestType[proofType],

yarn-project/telemetry-client/src/metrics.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,11 @@ export const PROVING_QUEUE_REJECTED_JOBS: MetricDefinition = {
10411041
description: 'Number of rejected proving jobs',
10421042
valueType: ValueType.INT,
10431043
};
1044+
export const PROVING_QUEUE_ABORTED_JOBS: MetricDefinition = {
1045+
name: 'aztec.proving_queue.aborted_jobs_count',
1046+
description: 'Number of aborted proving jobs',
1047+
valueType: ValueType.INT,
1048+
};
10441049
export const PROVING_QUEUE_RETRIED_JOBS: MetricDefinition = {
10451050
name: 'aztec.proving_queue.retried_jobs_count',
10461051
description: 'Number of retried proving jobs',

0 commit comments

Comments
 (0)