diff --git a/src/common/telemetry/constants.ts b/src/common/telemetry/constants.ts index ea021eac..35f28217 100644 --- a/src/common/telemetry/constants.ts +++ b/src/common/telemetry/constants.ts @@ -60,11 +60,12 @@ export enum EventNames { */ MANAGER_REGISTRATION_FAILED = 'MANAGER_REGISTRATION.FAILED', /** - * Telemetry event fired when the setup block appears to be hung. - * A watchdog timer fires after a deadline; if the setup completes normally, - * the timer is cancelled and this event never fires. + * Watchdog event fired when setup appears hung. Cancelled if setup completes normally. * Properties: - * - failureStage: string (which phase was in progress when the watchdog fired) + * - failureStage: which phase was in progress when the watchdog fired + * - globalScopeDeferred: distinguishes a real foreground hang from the benign 120s + * background global-scope scan. String union (not boolean) because the sender drops + * undefined. Values: 'deferred' | 'not_deferred' | 'unknown'. * Measures: * - duration: total elapsed since activation * - stageDuration: elapsed in the current stage @@ -86,6 +87,14 @@ export enum EventNames { * - errorType: string (classified error category, on failure only) */ PET_INIT_DURATION = 'PET.INIT_DURATION', + /** + * Fired once per activation. Lets us slice every other PET event by which binary + * version was running — important since PET evolves independently of the extension. + * Properties: + * - version: output of `pet --version` (e.g. '0.1.0'), or 'unknown' on failure/timeout + * - source: 'envs_extension' | 'python_extension' — which extension shipped the binary + */ + PET_VERSION = 'PET.VERSION', /** * Telemetry event fired when applyInitialEnvironmentSelection begins. * Signals that all managers are registered and env selection is starting. @@ -146,12 +155,16 @@ export enum EventNames { * Telemetry event for a PET refresh attempt (the core discovery RPC call). * Properties: * - result: 'success' | 'timeout' | 'error' - * - envCount: number (environments returned via notifications) - * - unresolvedCount: number (envs that needed follow-up resolve calls) - * - workspaceDirCount: number (workspace directories sent in configure) - * - searchPathCount: number (extra search paths sent in configure) - * - attempt: number (0 = first try, 1 = retry) - * - errorType: string (classified error category, on failure only) + * - envCount, unresolvedCount, workspaceDirCount, searchPathCount: number + * - attempt: 0 = first try, 1 = retry + * - errorType: classified error category, on failure only + * - locatorsJson: JSON-serialized Record. Locator set is platform-dependent + * so a flat blob is more practical than a fixed schema. Parse with parse_json() in Kusto. + * Measures (numeric; phases run in parallel so sum may exceed total wall-clock): + * - breakdownLocators: ms in locator plugins + * - breakdownPathEnv: ms scanning PATH env var entries (not a file path) + * - breakdownGlobalVirtualEnvs: ms scanning global virtualenv dirs + * - breakdownWorkspaces: ms scanning workspace dirs */ PET_REFRESH = 'PET.REFRESH', /** @@ -166,9 +179,15 @@ export enum EventNames { /** * Telemetry event for PET process restart attempts. * Properties: - * - attempt: number (1-based restart attempt number) + * - attempt: 1-based restart attempt number * - result: 'success' | 'error' - * - errorType: string (classified error category, on failure only) + * - errorType: classified error category, on failure only + * - triggerReason: why the restart was needed (lets us separate crashes from + * timeout-induced kills; the most specific reason wins — rpc_* recorded before a + * kill is not overwritten by the subsequent exit event). Values: + * rpc_connection_error | rpc_resolve_timeout | rpc_refresh_timeout | + * rpc_configure_timeout | process_exit:: | process_error | + * start_failed | unknown */ PET_PROCESS_RESTART = 'PET.PROCESS_RESTART', /** @@ -391,12 +410,20 @@ export interface IEventNamePropertyMapping { /* __GDPR__ "setup.hang_detected": { "failureStage": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "StellaHuang95" }, + "globalScopeDeferred": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, "": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }, "": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" } } */ [EventNames.SETUP_HANG_DETECTED]: { failureStage: string; + /** + * Distinguishes a real foreground hang from the benign 120s background global-scope scan. + * - 'deferred': workspace env resolved; global scope ran in background. + * - 'not_deferred': no workspace env; global scope was awaited as primary fallback. + * - 'unknown': hang fired before env-selection reached the global-scope decision. + */ + globalScopeDeferred: 'deferred' | 'not_deferred' | 'unknown'; }; /* __GDPR__ @@ -422,6 +449,19 @@ export interface IEventNamePropertyMapping { errorType?: string; }; + /* __GDPR__ + "pet.version": { + "version": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, + "source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" } + } + */ + [EventNames.PET_VERSION]: { + /** Version string reported by `pet --version` (e.g. '0.1.0'), or 'unknown' if the lookup failed. */ + version: string; + /** Which extension shipped the PET binary that's being used. */ + source: 'envs_extension' | 'python_extension'; + }; + /* __GDPR__ "env_selection.started": { "registeredManagerCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }, @@ -515,6 +555,11 @@ export interface IEventNamePropertyMapping { "searchPathCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }, "attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }, "errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, + "breakdownLocators": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" }, + "breakdownPathEnv": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" }, + "breakdownGlobalVirtualEnvs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" }, + "breakdownWorkspaces": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "owner": "eleanorjboyd" }, + "locatorsJson": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "owner": "eleanorjboyd" }, "": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" } } */ @@ -526,6 +571,17 @@ export interface IEventNamePropertyMapping { searchPathCount?: number; attempt: number; errorType?: string; + // breakdown* fields go through the measures payload (numeric); listed here for GDPR only. + /** ms in the Locators phase. */ + breakdownLocators?: number; + /** ms walking PATH env var entries (not a file path). */ + breakdownPathEnv?: number; + /** ms scanning global virtual-env dirs. */ + breakdownGlobalVirtualEnvs?: number; + /** ms scanning workspace dirs. */ + breakdownWorkspaces?: number; + /** JSON-serialized Record. Parse with parse_json() in Kusto. */ + locatorsJson?: string; }; /* __GDPR__ @@ -549,6 +605,7 @@ export interface IEventNamePropertyMapping { "attempt": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }, "result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, "errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, + "triggerReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" }, "": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" } } */ @@ -556,6 +613,14 @@ export interface IEventNamePropertyMapping { attempt: number; result: 'success' | 'error'; errorType?: string; + /** + * Why the restart was needed. The most specific reason wins (an rpc_* value recorded + * before the kill is not overwritten by the subsequent exit/error event). + * Values: rpc_connection_error | rpc_resolve_timeout | rpc_refresh_timeout | + * rpc_configure_timeout | process_exit:: | process_error | + * start_failed | unknown. + */ + triggerReason: string; }; /* __GDPR__ diff --git a/src/extension.ts b/src/extension.ts index ec8a24cd..64ffa07f 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -96,7 +96,12 @@ import { collectEnvironmentInfo, getEnvManagerAndPackageManagerConfigLevels, run import { EnvironmentManagers, ProjectCreators, PythonProjectManager } from './internal.api'; import { registerSystemPythonFeatures } from './managers/builtin/main'; import { SysPythonManager } from './managers/builtin/sysPythonManager'; -import { createNativePythonFinder, NativePythonFinder } from './managers/common/nativePythonFinder'; +import { + createNativePythonFinder, + getNativePythonToolsPathAndSource, + getNativePythonToolsVersion, + NativePythonFinder, +} from './managers/common/nativePythonFinder'; import { IDisposable } from './managers/common/types'; import { registerCondaFeatures } from './managers/conda/main'; import { registerPipenvFeatures } from './managers/pipenv/main'; @@ -553,6 +558,9 @@ export async function activate(context: ExtensionContext): Promise { let failureStage = 'nativeFinder'; const stageWatch = new StopWatch(); + // Mutable ref so the hang watchdog can report whether global scope was deferred + // even if it fires mid-envSelection before applyInitialEnvironmentSelection returns. + const globalScopeDeferredRef: { value: 'deferred' | 'not_deferred' | 'unknown' } = { value: 'unknown' }; // Watchdog: fires if setup hasn't completed within 120s, indicating a likely hang const SETUP_HANG_TIMEOUT_MS = 120_000; let hangWatchdogActive = true; @@ -572,7 +580,7 @@ export async function activate(context: ExtensionContext): Promise { + const version = await getNativePythonToolsVersion(toolPath); + sendTelemetryEvent(EventNames.PET_VERSION, undefined, { version, source }); + }) + .catch(() => { + sendTelemetryEvent(EventNames.PET_VERSION, undefined, { + version: 'unknown', + source: 'python_extension', + }); + }); } catch (petError) { sendTelemetryEvent( EventNames.PET_INIT_DURATION, @@ -618,7 +639,14 @@ export async function activate(context: ExtensionContext): Promise { const folders = getWorkspaceFolders() ?? []; traceInfo( @@ -377,6 +378,9 @@ export async function applyInitialEnvironmentSelection( if (workspaceFolderResolved) { // Defer global scope so it doesn't block post-selection startup. traceInfo('[interpreterSelection] Workspace env resolved, deferring global scope to background'); + if (globalScopeDeferredRef) { + globalScopeDeferredRef.value = 'deferred'; + } resolveGlobalScope() .then(async (globalErrors) => { if (globalErrors.length > 0) { @@ -386,6 +390,9 @@ export async function applyInitialEnvironmentSelection( .catch((err) => traceError(`[interpreterSelection] Background global scope resolution failed: ${err}`)); } else { // No workspace folder resolved — global scope is the primary fallback, must await. + if (globalScopeDeferredRef) { + globalScopeDeferredRef.value = 'not_deferred'; + } const globalErrors = await resolveGlobalScope(); allErrors.push(...globalErrors); } diff --git a/src/managers/common/nativePythonFinder.ts b/src/managers/common/nativePythonFinder.ts index 6efed7af..33ebbe3c 100644 --- a/src/managers/common/nativePythonFinder.ts +++ b/src/managers/common/nativePythonFinder.ts @@ -85,12 +85,21 @@ export class ConfigureRetryState { } } +export type NativePythonToolsSource = 'envs_extension' | 'python_extension'; + export async function getNativePythonToolsPath(): Promise { + return (await getNativePythonToolsPathAndSource()).toolPath; +} + +export async function getNativePythonToolsPathAndSource(): Promise<{ + toolPath: string; + source: NativePythonToolsSource; +}> { const envsExt = getExtension(ENVS_EXTENSION_ID); if (envsExt) { const petPath = path.join(envsExt.extensionPath, 'python-env-tools', 'bin', isWindows() ? 'pet.exe' : 'pet'); if (await fs.pathExists(petPath)) { - return petPath; + return { toolPath: petPath, source: 'envs_extension' }; } } @@ -99,7 +108,64 @@ export async function getNativePythonToolsPath(): Promise { throw new Error('Python extension not found'); } - return path.join(python.extensionPath, 'python-env-tools', 'bin', isWindows() ? 'pet.exe' : 'pet'); + return { + toolPath: path.join(python.extensionPath, 'python-env-tools', 'bin', isWindows() ? 'pet.exe' : 'pet'), + source: 'python_extension', + }; +} + +/** + * Runs `pet --version` and returns the parsed version string (e.g. '0.1.0'). + * Returns 'unknown' if the command fails, times out, or the output can't be parsed. + */ +export async function getNativePythonToolsVersion(toolPath: string, timeoutMs: number = 5_000): Promise { + return new Promise((resolve) => { + let settled = false; + const settle = (value: string) => { + if (settled) { + return; + } + settled = true; + resolve(value); + }; + try { + const proc = spawnProcess(toolPath, ['--version'], { stdio: 'pipe' }); + let stdout = ''; + const timer = setTimeout(() => { + try { + proc.kill('SIGTERM'); + // Force kill after a short grace period if still running. + setTimeout(() => { + if (proc.exitCode === null) { + try { + proc.kill('SIGKILL'); + } catch { + // ignore + } + } + }, 500); + } catch { + // ignore + } + settle('unknown'); + }, timeoutMs); + proc.stdout?.on('data', (data: Buffer) => { + stdout += data.toString(); + }); + proc.on('error', () => { + clearTimeout(timer); + settle('unknown'); + }); + proc.on('close', () => { + clearTimeout(timer); + // Output looks like "pet 0.1.0\n" — extract the version token. + const match = stdout.match(/\b\d+\.\d+\.\d+\S*/); + settle(match ? match[0] : 'unknown'); + }); + } catch { + settle('unknown'); + } + }); } export interface NativeEnvInfo { @@ -182,6 +248,23 @@ interface RefreshOptions { searchPaths?: string[]; } +/** Performance breakdown sent by PET via the `telemetry` notification after a refresh. */ +interface RefreshPerformance { + total: number; + /** Phase name (Locators | Path | GlobalVirtualEnvs | Workspaces) → wall-clock ms */ + breakdown: Record; + /** Locator name (Conda | WindowsRegistry | …) → wall-clock ms; only ran locators are present */ + locators: Record; +} + +/** Params shape of the PET `telemetry` JSON-RPC notification. */ +interface PetTelemetryNotification { + event: string; + data: { + refreshPerformance?: RefreshPerformance; + }; +} + /** * Error thrown when a JSON-RPC request times out. */ @@ -237,6 +320,7 @@ class NativePythonFinderImpl implements NativePythonFinder { private startFailed: boolean = false; private restartAttempts: number = 0; private isRestarting: boolean = false; + private processExitReason: string | undefined = undefined; private readonly configureRetry = new ConfigureRetryState(); constructor( @@ -279,6 +363,8 @@ class NativePythonFinderImpl implements NativePythonFinder { this.outputChannel.warn(`[pet] Resolve request ${reason}, killing process for restart`); this.killProcess(); this.processExited = true; + this.processExitReason = + ex instanceof rpc.ConnectionError ? 'rpc_connection_error' : 'rpc_resolve_timeout'; } throw ex; } @@ -339,6 +425,7 @@ class NativePythonFinderImpl implements NativePythonFinder { this.isRestarting = true; this.restartAttempts++; const attempt = this.restartAttempts; + const triggerReason = this.processExitReason ?? (this.startFailed ? 'start_failed' : 'unknown'); const backoffMs = RESTART_BACKOFF_BASE_MS * Math.pow(2, this.restartAttempts - 1); this.outputChannel.warn( @@ -361,6 +448,7 @@ class NativePythonFinderImpl implements NativePythonFinder { // Reset state flags this.processExited = false; this.startFailed = false; + this.processExitReason = undefined; this.lastConfiguration = undefined; // Force reconfiguration this.configureRetry.reset(); @@ -368,7 +456,11 @@ class NativePythonFinderImpl implements NativePythonFinder { this.connection = this.start(); this.outputChannel.info('[pet] Python Environment Tools restarted successfully'); - sendTelemetryEvent(EventNames.PET_PROCESS_RESTART, sw.elapsedTime, { attempt, result: 'success' }); + sendTelemetryEvent(EventNames.PET_PROCESS_RESTART, sw.elapsedTime, { + attempt, + result: 'success', + triggerReason, + }); // Reset restart attempts on successful start (process didn't immediately fail) // We'll reset this only after a successful request completes @@ -376,7 +468,7 @@ class NativePythonFinderImpl implements NativePythonFinder { sendTelemetryEvent( EventNames.PET_PROCESS_RESTART, sw.elapsedTime, - { attempt, result: 'error', errorType: classifyError(ex) }, + { attempt, result: 'error', errorType: classifyError(ex), triggerReason }, ex instanceof Error ? ex : undefined, ); this.outputChannel.error('[pet] Failed to restart Python Environment Tools:', ex); @@ -519,6 +611,10 @@ class NativePythonFinderImpl implements NativePythonFinder { // Handle process exit - mark as exited so pending requests fail fast this.proc.on('exit', (code, signal) => { this.processExited = true; + // Preserve a more-specific reason (e.g. rpc_*) if one was already recorded before the kill. + if (this.processExitReason === undefined) { + this.processExitReason = `process_exit:${code ?? 'null'}:${signal ?? 'none'}`; + } if (code !== 0) { this.outputChannel.error( `[pet] Python Environment Tools exited unexpectedly with code ${code}, signal ${signal}`, @@ -529,6 +625,9 @@ class NativePythonFinderImpl implements NativePythonFinder { // Handle process errors (e.g., ENOENT if executable not found) this.proc.on('error', (err) => { this.processExited = true; + if (this.processExitReason === undefined) { + this.processExitReason = 'process_error'; + } this.outputChannel.error('[pet] Process error:', err); }); @@ -626,6 +725,8 @@ class NativePythonFinderImpl implements NativePythonFinder { // Kill and restart for retry this.killProcess(); this.processExited = true; + this.processExitReason = + ex instanceof rpc.ConnectionError ? 'rpc_connection_error' : 'rpc_refresh_timeout'; continue; } // Final attempt failed @@ -658,6 +759,7 @@ class NativePythonFinderImpl implements NativePythonFinder { const nativeInfo: NativeInfo[] = []; const sw = new StopWatch(); let unresolvedCount = 0; + let refreshPerf: RefreshPerformance | undefined; try { await this.configure(); const refreshOptions = this.getRefreshOptions(options); @@ -693,6 +795,11 @@ class NativePythonFinderImpl implements NativePythonFinder { this.outputChannel.info(`Discovered manager: (${data.tool}) ${data.executable}`); nativeInfo.push(data); }), + this.connection.onNotification('telemetry', (notification: PetTelemetryNotification) => { + if (notification?.event === 'RefreshPerformance' && notification.data?.refreshPerformance) { + refreshPerf = notification.data.refreshPerformance; + } + }), ); await sendRequestWithTimeout<{ duration: number }>( this.connection, @@ -708,14 +815,33 @@ class NativePythonFinderImpl implements NativePythonFinder { this.outputChannel.info(`[pet] Refresh succeeded on retry attempt ${attempt + 1}`); } - sendTelemetryEvent(EventNames.PET_REFRESH, sw.elapsedTime, { - result: 'success', - envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length, - unresolvedCount, - workspaceDirCount, - searchPathCount, - attempt, - }); + sendTelemetryEvent( + EventNames.PET_REFRESH, + { + duration: sw.elapsedTime, + ...(refreshPerf?.breakdown['Locators'] !== undefined && { + breakdownLocators: refreshPerf.breakdown['Locators'], + }), + ...(refreshPerf?.breakdown['Path'] !== undefined && { + breakdownPathEnv: refreshPerf.breakdown['Path'], + }), + ...(refreshPerf?.breakdown['GlobalVirtualEnvs'] !== undefined && { + breakdownGlobalVirtualEnvs: refreshPerf.breakdown['GlobalVirtualEnvs'], + }), + ...(refreshPerf?.breakdown['Workspaces'] !== undefined && { + breakdownWorkspaces: refreshPerf.breakdown['Workspaces'], + }), + }, + { + result: 'success', + envCount: nativeInfo.filter((e) => isNativeEnvInfo(e)).length, + unresolvedCount, + workspaceDirCount, + searchPathCount, + attempt, + locatorsJson: refreshPerf ? JSON.stringify(refreshPerf.locators) : undefined, + }, + ); } catch (ex) { const errorType = classifyError(ex); sendTelemetryEvent( @@ -737,6 +863,8 @@ class NativePythonFinderImpl implements NativePythonFinder { this.outputChannel.warn(`[pet] PET process ${reason}, killing for restart`); this.killProcess(); this.processExited = true; + this.processExitReason = + ex instanceof rpc.ConnectionError ? 'rpc_connection_error' : 'rpc_refresh_timeout'; } this.outputChannel.error('[pet] Error refreshing', ex); throw ex; @@ -806,6 +934,7 @@ class NativePythonFinderImpl implements NativePythonFinder { ); this.killProcess(); this.processExited = true; + this.processExitReason = 'rpc_configure_timeout'; } else { this.outputChannel.warn( `[pet] Configure request timed out (attempt ${this.configureRetry.timeoutCount}/${MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL}), ` +