Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 77 additions & 12 deletions src/common/telemetry/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ export enum EventNames {
*/
MANAGER_REGISTRATION_FAILED = 'MANAGER_REGISTRATION.FAILED',
/**
* Telemetry event fired when the setup block appears to be hung.
* A watchdog timer fires after a deadline; if the setup completes normally,
* the timer is cancelled and this event never fires.
* Watchdog event fired when setup appears hung. Cancelled if setup completes normally.
* Properties:
* - failureStage: string (which phase was in progress when the watchdog fired)
* - failureStage: which phase was in progress when the watchdog fired
* - globalScopeDeferred: distinguishes a real foreground hang from the benign 120s
* background global-scope scan. String union (not boolean) because the sender drops
* undefined. Values: 'deferred' | 'not_deferred' | 'unknown'.
* Measures:
* - duration: total elapsed since activation
* - stageDuration: elapsed in the current stage
Expand All @@ -86,6 +87,14 @@ export enum EventNames {
* - errorType: string (classified error category, on failure only)
*/
PET_INIT_DURATION = 'PET.INIT_DURATION',
/**
* Fired once per activation. Lets us slice every other PET event by which binary
* version was running — important since PET evolves independently of the extension.
* Properties:
* - version: output of `pet --version` (e.g. '0.1.0'), or 'unknown' on failure/timeout
* - source: 'envs_extension' | 'python_extension' — which extension shipped the binary
*/
PET_VERSION = 'PET.VERSION',
/**
* Telemetry event fired when applyInitialEnvironmentSelection begins.
* Signals that all managers are registered and env selection is starting.
Expand Down Expand Up @@ -146,12 +155,16 @@ export enum EventNames {
* Telemetry event for a PET refresh attempt (the core discovery RPC call).
* Properties:
* - result: 'success' | 'timeout' | 'error'
* - envCount: number (environments returned via notifications)
* - unresolvedCount: number (envs that needed follow-up resolve calls)
* - workspaceDirCount: number (workspace directories sent in configure)
* - searchPathCount: number (extra search paths sent in configure)
* - attempt: number (0 = first try, 1 = retry)
* - errorType: string (classified error category, on failure only)
* - envCount, unresolvedCount, workspaceDirCount, searchPathCount: number
* - attempt: 0 = first try, 1 = retry
* - errorType: classified error category, on failure only
* - locatorsJson: JSON-serialized Record<locatorName, ms>. Locator set is platform-dependent
* so a flat blob is more practical than a fixed schema. Parse with parse_json() in Kusto.
* Measures (numeric; phases run in parallel so sum may exceed total wall-clock):
* - breakdownLocators: ms in locator plugins
* - breakdownPathEnv: ms scanning PATH env var entries (not a file path)
* - breakdownGlobalVirtualEnvs: ms scanning global virtualenv dirs
* - breakdownWorkspaces: ms scanning workspace dirs
*/
PET_REFRESH = 'PET.REFRESH',
/**
Expand All @@ -166,9 +179,15 @@ export enum EventNames {
/**
* Telemetry event for PET process restart attempts.
* Properties:
* - attempt: number (1-based restart attempt number)
* - attempt: 1-based restart attempt number
* - result: 'success' | 'error'
* - errorType: string (classified error category, on failure only)
* - errorType: classified error category, on failure only
* - triggerReason: why the restart was needed (lets us separate crashes from
* timeout-induced kills; the most specific reason wins — rpc_* recorded before a
* kill is not overwritten by the subsequent exit event). Values:
* rpc_connection_error | rpc_resolve_timeout | rpc_refresh_timeout |
* rpc_configure_timeout | process_exit:<code>:<signal> | process_error |
* start_failed | unknown
*/
PET_PROCESS_RESTART = 'PET.PROCESS_RESTART',
/**
Expand Down Expand Up @@ -391,12 +410,20 @@ export interface IEventNamePropertyMapping {
/* __GDPR__
"setup.hang_detected": {
"failureStage": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "StellaHuang95" },
"globalScopeDeferred": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
"<stageDuration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
}
*/
[EventNames.SETUP_HANG_DETECTED]: {
failureStage: string;
/**
* Distinguishes a real foreground hang from the benign 120s background global-scope scan.
* - 'deferred': workspace env resolved; global scope ran in background.
* - 'not_deferred': no workspace env; global scope was awaited as primary fallback.
* - 'unknown': hang fired before env-selection reached the global-scope decision.
*/
globalScopeDeferred: 'deferred' | 'not_deferred' | 'unknown';
};

/* __GDPR__
Expand All @@ -422,6 +449,19 @@ export interface IEventNamePropertyMapping {
errorType?: string;
};

/* __GDPR__
"pet.version": {
"version": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }
}
*/
[EventNames.PET_VERSION]: {
/** Version string reported by `pet --version` (e.g. '0.1.0'), or 'unknown' if the lookup failed. */
version: string;
/** Which extension shipped the PET binary that's being used. */
source: 'envs_extension' | 'python_extension';
};

/* __GDPR__
"env_selection.started": {
"registeredManagerCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
Expand Down Expand Up @@ -515,6 +555,11 @@ export interface IEventNamePropertyMapping {
"searchPathCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"breakdownLocators": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" },
"breakdownPathEnv": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" },
"breakdownGlobalVirtualEnvs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" },
"breakdownWorkspaces": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" },
"locatorsJson": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "owner": "eleanorjboyd" },
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
}
*/
Expand All @@ -526,6 +571,17 @@ export interface IEventNamePropertyMapping {
searchPathCount?: number;
attempt: number;
errorType?: string;
// breakdown* fields go through the measures payload (numeric); listed here for GDPR only.
/** ms in the Locators phase. */
breakdownLocators?: number;
/** ms walking PATH env var entries (not a file path). */
breakdownPathEnv?: number;
/** ms scanning global virtual-env dirs. */
breakdownGlobalVirtualEnvs?: number;
/** ms scanning workspace dirs. */
breakdownWorkspaces?: number;
/** JSON-serialized Record<locatorName, ms>. Parse with parse_json() in Kusto. */
locatorsJson?: string;
};

/* __GDPR__
Expand All @@ -549,13 +605,22 @@ export interface IEventNamePropertyMapping {
"attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" },
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"triggerReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
}
*/
[EventNames.PET_PROCESS_RESTART]: {
attempt: number;
result: 'success' | 'error';
errorType?: string;
/**
* Why the restart was needed. The most specific reason wins (an rpc_* value recorded
* before the kill is not overwritten by the subsequent exit/error event).
* Values: rpc_connection_error | rpc_resolve_timeout | rpc_refresh_timeout |
* rpc_configure_timeout | process_exit:<code>:<signal> | process_error |
* start_failed | unknown.
*/
triggerReason: string;
};

/* __GDPR__
Expand Down
34 changes: 31 additions & 3 deletions src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,12 @@ import { collectEnvironmentInfo, getEnvManagerAndPackageManagerConfigLevels, run
import { EnvironmentManagers, ProjectCreators, PythonProjectManager } from './internal.api';
import { registerSystemPythonFeatures } from './managers/builtin/main';
import { SysPythonManager } from './managers/builtin/sysPythonManager';
import { createNativePythonFinder, NativePythonFinder } from './managers/common/nativePythonFinder';
import {
createNativePythonFinder,
getNativePythonToolsPathAndSource,
getNativePythonToolsVersion,
NativePythonFinder,
} from './managers/common/nativePythonFinder';
import { IDisposable } from './managers/common/types';
import { registerCondaFeatures } from './managers/conda/main';
import { registerPipenvFeatures } from './managers/pipenv/main';
Expand Down Expand Up @@ -553,6 +558,9 @@ export async function activate(context: ExtensionContext): Promise<PythonEnviron
setImmediate(async () => {
let failureStage = 'nativeFinder';
const stageWatch = new StopWatch();
// Mutable ref so the hang watchdog can report whether global scope was deferred
// even if it fires mid-envSelection before applyInitialEnvironmentSelection returns.
const globalScopeDeferredRef: { value: 'deferred' | 'not_deferred' | 'unknown' } = { value: 'unknown' };
// Watchdog: fires if setup hasn't completed within 120s, indicating a likely hang
const SETUP_HANG_TIMEOUT_MS = 120_000;
let hangWatchdogActive = true;
Expand All @@ -572,7 +580,7 @@ export async function activate(context: ExtensionContext): Promise<PythonEnviron
sendTelemetryEvent(
EventNames.SETUP_HANG_DETECTED,
{ duration: start.elapsedTime, stageDuration: stageWatch.elapsedTime },
{ failureStage },
{ failureStage, globalScopeDeferred: globalScopeDeferredRef.value },
);
Comment thread
eleanorjboyd marked this conversation as resolved.
}, SETUP_HANG_TIMEOUT_MS);
context.subscriptions.push({ dispose: clearHangWatchdog });
Expand All @@ -583,6 +591,19 @@ export async function activate(context: ExtensionContext): Promise<PythonEnviron
try {
nativeFinder = await createNativePythonFinder(outputChannel, api, context);
sendTelemetryEvent(EventNames.PET_INIT_DURATION, petStart.elapsedTime, { result: 'success' });
// Fire-and-forget: report the bundled PET binary version so other PET telemetry
// can be sliced by version. Don't block activation on this.
void getNativePythonToolsPathAndSource()
.then(async ({ toolPath, source }) => {
const version = await getNativePythonToolsVersion(toolPath);
sendTelemetryEvent(EventNames.PET_VERSION, undefined, { version, source });
})
.catch(() => {
sendTelemetryEvent(EventNames.PET_VERSION, undefined, {
version: 'unknown',
source: 'python_extension',
});
});
} catch (petError) {
sendTelemetryEvent(
EventNames.PET_INIT_DURATION,
Expand Down Expand Up @@ -618,7 +639,14 @@ export async function activate(context: ExtensionContext): Promise<PythonEnviron

failureStage = 'envSelection';
stageWatch.reset();
await applyInitialEnvironmentSelection(envManagers, projectManager, nativeFinder, api, start.elapsedTime);
await applyInitialEnvironmentSelection(
envManagers,
projectManager,
nativeFinder,
api,
start.elapsedTime,
globalScopeDeferredRef,
);

// Register manager-agnostic terminal watcher for package-modifying commands
failureStage = 'terminalWatcher';
Expand Down
7 changes: 7 additions & 0 deletions src/features/interpreterSelection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ export async function applyInitialEnvironmentSelection(
nativeFinder: NativePythonFinder,
api: PythonEnvironmentApi,
activationToReadyDurationMs?: number,
globalScopeDeferredRef?: { value: 'deferred' | 'not_deferred' | 'unknown' },
): Promise<void> {
const folders = getWorkspaceFolders() ?? [];
traceInfo(
Expand Down Expand Up @@ -377,6 +378,9 @@ export async function applyInitialEnvironmentSelection(
if (workspaceFolderResolved) {
// Defer global scope so it doesn't block post-selection startup.
traceInfo('[interpreterSelection] Workspace env resolved, deferring global scope to background');
if (globalScopeDeferredRef) {
globalScopeDeferredRef.value = 'deferred';
}
resolveGlobalScope()
.then(async (globalErrors) => {
if (globalErrors.length > 0) {
Expand All @@ -386,6 +390,9 @@ export async function applyInitialEnvironmentSelection(
.catch((err) => traceError(`[interpreterSelection] Background global scope resolution failed: ${err}`));
} else {
// No workspace folder resolved — global scope is the primary fallback, must await.
if (globalScopeDeferredRef) {
globalScopeDeferredRef.value = 'not_deferred';
}
Comment thread
eleanorjboyd marked this conversation as resolved.
const globalErrors = await resolveGlobalScope();
allErrors.push(...globalErrors);
}
Expand Down
Loading
Loading