From 8a7eb2ea767ec602bc4dcbb659da0300e7696855 Mon Sep 17 00:00:00 2001 From: Jon Brown Date: Mon, 18 May 2026 09:30:22 -0700 Subject: [PATCH] Reduce OpenAI dashboard scrape payload --- .../OpenAIWeb/OpenAIDashboardFetcher.swift | 62 ++++----- .../OpenAIDashboardScrapeScript.swift | 129 +++++++++++++++++- .../OpenAIDashboardScrapeScriptTests.swift | 50 +++++++ 3 files changed, 203 insertions(+), 38 deletions(-) diff --git a/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardFetcher.swift b/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardFetcher.swift index a056b99bb..93caf0634 100644 --- a/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardFetcher.swift +++ b/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardFetcher.swift @@ -106,8 +106,7 @@ public struct OpenAIDashboardFetcher { let codeReviewLimit = OpenAIDashboardParser.parseCodeReviewLimit(bodyText: bodyText) let parsedCreditsRemaining = OpenAIDashboardParser.parseCreditsRemaining(bodyText: bodyText) let creditsRemaining = apiData?.creditsRemaining ?? parsedCreditsRemaining - let parsedAccountPlan = scrape.bodyHTML.flatMap(OpenAIDashboardParser.parsePlanFromHTML) - let accountPlan = parsedAccountPlan ?? apiData?.accountPlan + let accountPlan = scrape.accountPlan ?? apiData?.accountPlan let hasParsedUsageLimits = parsedRateLimits.primary != nil || parsedRateLimits.secondary != nil let hasUsageLimits = rateLimits.primary != nil || rateLimits.secondary != nil let hasDashboardPageData = self.hasReturnableDashboardData( @@ -227,7 +226,6 @@ public struct OpenAIDashboardFetcher { let log = lease.log var lastBody: String? - var lastHTML: String? var lastHref: String? var lastFlags: (loginRequired: Bool, workspacePicker: Bool, cloudflare: Bool)? var codeReviewFirstSeenAt: Date? @@ -240,7 +238,6 @@ public struct OpenAIDashboardFetcher { while Date() < deadline { let scrape = try await self.scrape(webView: webView) lastBody = scrape.bodyText ?? lastBody - lastHTML = scrape.bodyHTML ?? lastHTML if scrape.href != lastHref || lastFlags?.loginRequired != scrape.loginRequired @@ -267,7 +264,13 @@ public struct OpenAIDashboardFetcher { continue } - try Self.throwIfBlockingScrapeState(scrape, debugDumpHTML: debugDumpHTML, logger: log) + if debugDumpHTML, + scrape.loginRequired || scrape.cloudflareInterstitial, + let html = try? await self.fetchDebugHTML(webView: webView) + { + Self.writeDebugArtifacts(html: html, bodyText: scrape.bodyText, logger: log) + } + try Self.throwIfBlockingScrapeState(scrape) let dashboardData = Self.parseDashboardScrape( scrape, @@ -368,7 +371,7 @@ public struct OpenAIDashboardFetcher { try? await Task.sleep(for: .milliseconds(500)) } - if debugDumpHTML, let html = lastHTML { + if debugDumpHTML, let html = try? await self.fetchDebugHTML(webView: webView) { Self.writeDebugArtifacts(html: html, bodyText: lastBody, logger: log) } throw FetchError.noDashboardData(body: lastUsageBreakdownError ?? lastBody ?? "") @@ -506,8 +509,9 @@ public struct OpenAIDashboardFetcher { let cloudflareInterstitial: Bool let href: String? let bodyText: String? - let bodyHTML: String? let signedInEmail: String? + let authStatus: String? + let accountPlan: String? let creditsPurchaseURL: String? let rows: [[String]] let usageBreakdown: [OpenAIDashboardDailyBreakdown] @@ -530,8 +534,9 @@ public struct OpenAIDashboardFetcher { cloudflareInterstitial: false, href: nil, bodyText: nil, - bodyHTML: nil, signedInEmail: nil, + authStatus: nil, + accountPlan: nil, creditsPurchaseURL: nil, rows: [], usageBreakdown: [], @@ -549,7 +554,6 @@ public struct OpenAIDashboardFetcher { let workspacePicker = (dict["workspacePicker"] as? Bool) ?? false let cloudflareInterstitial = (dict["cloudflareInterstitial"] as? Bool) ?? false let rows = (dict["rows"] as? [[String]]) ?? [] - let bodyHTML = dict["bodyHTML"] as? String var usageBreakdown: [OpenAIDashboardDailyBreakdown] = [] let usageBreakdownDebug = dict["usageBreakdownDebug"] as? String @@ -566,18 +570,14 @@ public struct OpenAIDashboardFetcher { } var signedInEmail = dict["signedInEmail"] as? String - if let bodyHTML, - signedInEmail == nil || signedInEmail?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == true - { - signedInEmail = OpenAIDashboardParser.parseSignedInEmailFromClientBootstrap(html: bodyHTML) - } - - if let bodyHTML, let authStatus = OpenAIDashboardParser.parseAuthStatusFromClientBootstrap(html: bodyHTML) { - if authStatus.lowercased() != "logged_in" { - // When logged out, the SPA can render a generic landing shell without obvious auth inputs, - // so treat it as login-required and let the caller retry cookie import. - loginRequired = true - } + signedInEmail = signedInEmail?.trimmingCharacters(in: .whitespacesAndNewlines) + let authStatus = (dict["authStatus"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + let accountPlan = (dict["accountPlan"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + + if let authStatus, !authStatus.isEmpty, authStatus.lowercased() != "logged_in" { + // When logged out, the SPA can render a generic landing shell without obvious auth inputs, + // so treat it as login-required and let the caller retry cookie import. + loginRequired = true } return ScrapeResult( @@ -586,8 +586,9 @@ public struct OpenAIDashboardFetcher { cloudflareInterstitial: cloudflareInterstitial, href: dict["href"] as? String, bodyText: dict["bodyText"] as? String, - bodyHTML: bodyHTML, signedInEmail: signedInEmail, + authStatus: authStatus, + accountPlan: accountPlan, creditsPurchaseURL: dict["creditsPurchaseURL"] as? String, rows: rows, usageBreakdown: usageBreakdown, @@ -601,26 +602,21 @@ public struct OpenAIDashboardFetcher { didScrollToCredits: (dict["didScrollToCredits"] as? Bool) ?? false) } - private static func throwIfBlockingScrapeState( - _ scrape: ScrapeResult, - debugDumpHTML: Bool, - logger: (String) -> Void) throws - { + private static func throwIfBlockingScrapeState(_ scrape: ScrapeResult) throws { if scrape.loginRequired { - if debugDumpHTML, let html = scrape.bodyHTML { - self.writeDebugArtifacts(html: html, bodyText: scrape.bodyText, logger: logger) - } throw FetchError.loginRequired } if scrape.cloudflareInterstitial { - if debugDumpHTML, let html = scrape.bodyHTML { - self.writeDebugArtifacts(html: html, bodyText: scrape.bodyText, logger: logger) - } throw FetchError.noDashboardData(body: "Cloudflare challenge detected in WebView.") } } + private func fetchDebugHTML(webView: WKWebView) async throws -> String? { + try await webView.evaluateJavaScript( + "document.documentElement ? String(document.documentElement.outerHTML || '') : ''") as? String + } + private func makeWebView( websiteDataStore: WKWebsiteDataStore, logger: ((String) -> Void)?, diff --git a/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardScrapeScript.swift b/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardScrapeScript.swift index ce1f76037..b45d573c8 100644 --- a/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardScrapeScript.swift +++ b/Sources/CodexBarCore/OpenAIWeb/OpenAIDashboardScrapeScript.swift @@ -175,6 +175,105 @@ let openAIDashboardScrapeScript = """ const normalized = normalizeHref(anchorHref || dataHref || propHref); return normalized && isLikelyCreditsURL(normalized) ? normalized : null; }; + const cleanPlanName = (raw) => String(raw || '') + .replace(/\\b(claude|codex|account|plan)\\b/gi, ' ') + .replace(/_/g, ' ') + .replace(/-/g, ' ') + .replace(/\\s+/g, ' ') + .trim(); + const codexPlanDisplayName = (raw) => { + const trimmed = String(raw || '').trim(); + if (!trimmed) return null; + const lower = trimmed.toLowerCase(); + const exact = { + pro: 'Pro 20x', + prolite: 'Pro 5x', + 'pro_lite': 'Pro 5x', + 'pro-lite': 'Pro 5x', + 'pro lite': 'Pro 5x' + }; + if (exact[lower]) return exact[lower]; + const cleaned = cleanPlanName(trimmed); + if (!cleaned) return trimmed; + if (exact[cleaned.toLowerCase()]) return exact[cleaned.toLowerCase()]; + return cleaned.split(' ') + .filter(Boolean) + .map(word => { + const wordLower = word.toLowerCase(); + if (wordLower === 'cbp' || wordLower === 'k12') return wordLower.toUpperCase(); + if (word === word.toUpperCase() && /[a-z]/i.test(word)) return word; + return word.charAt(0).toUpperCase() + word.slice(1); + }) + .join(' ') || cleaned; + }; + const normalizePlanValue = (value) => { + const trimmed = String(value || '').trim(); + if (!trimmed) return null; + const lower = trimmed.toLowerCase(); + const allowed = [ + 'free', + 'plus', + 'pro', + 'team', + 'enterprise', + 'business', + 'edu', + 'education', + 'gov', + 'premium', + 'essential' + ]; + if (!allowed.some(token => lower.includes(token))) return null; + return codexPlanDisplayName(trimmed) || cleanPlanName(trimmed); + }; + const planCandidate = (key, value) => { + const lower = String(key || '').toLowerCase(); + if (!lower.includes('plan') && !lower.includes('tier') && !lower.includes('subscription')) return null; + if (typeof value === 'string') return normalizePlanValue(value); + if (value && typeof value === 'object' && !Array.isArray(value)) { + return normalizePlanValue(value.name) || + normalizePlanValue(value.displayName) || + normalizePlanValue(value.tier); + } + return null; + }; + const findPlan = (root) => { + if (!root || typeof root !== 'object') return null; + const queue = [root]; + const seenObjects = typeof WeakSet !== 'undefined' ? new WeakSet() : null; + let index = 0; + let seen = 0; + while (index < queue.length && seen < 6000) { + const cur = queue[index++]; + seen++; + if (!cur || typeof cur !== 'object') continue; + if (seenObjects) { + if (seenObjects.has(cur)) continue; + seenObjects.add(cur); + } + if (Array.isArray(cur)) { + for (const v of cur) { + if (v && typeof v === 'object') queue.push(v); + } + continue; + } + for (const [k, v] of Object.entries(cur)) { + const plan = planCandidate(k, v); + if (plan) return plan; + if (v && typeof v === 'object') queue.push(v); + } + } + return null; + }; + const parseJSONScript = (id) => { + try { + const node = document.getElementById(id); + const raw = node && node.textContent ? String(node.textContent) : ''; + return raw ? JSON.parse(raw) : null; + } catch { + return null; + } + }; const pickLikelyPurchaseButton = (buttons) => { if (!buttons || buttons.length === 0) return null; const labeled = buttons.find(btn => { @@ -687,6 +786,8 @@ let openAIDashboardScrapeScript = """ } catch {} let signedInEmail = null; + let authStatus = null; + let accountPlan = null; try { const next = window.__NEXT_DATA__ || null; const props = (next && next.props && next.props.pageProps) ? next.props.pageProps : null; @@ -695,12 +796,29 @@ let openAIDashboardScrapeScript = """ signedInEmail = userEmail || sessionEmail || null; } catch {} + const clientBootstrap = parseJSONScript('client-bootstrap'); + if (clientBootstrap) { + try { + authStatus = typeof clientBootstrap.authStatus === 'string' ? clientBootstrap.authStatus : null; + if (!signedInEmail) { + const session = clientBootstrap.session || null; + const user = (session && session.user) || clientBootstrap.user || null; + const email = user && typeof user.email === 'string' ? user.email : null; + if (email && email.includes('@')) signedInEmail = email; + } + if (!accountPlan) accountPlan = findPlan(clientBootstrap); + } catch {} + } + if (!accountPlan) { + try { + accountPlan = findPlan(window.__NEXT_DATA__ || parseJSONScript('__NEXT_DATA__')); + } catch {} + } + if (!signedInEmail) { try { - const node = document.getElementById('__NEXT_DATA__'); - const raw = node && node.textContent ? String(node.textContent) : ''; - if (raw) { - const obj = JSON.parse(raw); + const obj = parseJSONScript('__NEXT_DATA__'); + if (obj) { const queue = [obj]; let seen = 0; while (queue.length && seen < 2000 && !signedInEmail) { @@ -768,8 +886,9 @@ let openAIDashboardScrapeScript = """ cloudflareInterstitial, href, bodyText, - bodyHTML: document.documentElement ? String(document.documentElement.outerHTML || '') : '', signedInEmail, + authStatus, + accountPlan, creditsPurchaseURL, rows, usageBreakdownJSON, diff --git a/Tests/CodexBarTests/OpenAIDashboardScrapeScriptTests.swift b/Tests/CodexBarTests/OpenAIDashboardScrapeScriptTests.swift index 5ed6b621c..56901c24b 100644 --- a/Tests/CodexBarTests/OpenAIDashboardScrapeScriptTests.swift +++ b/Tests/CodexBarTests/OpenAIDashboardScrapeScriptTests.swift @@ -7,6 +7,23 @@ import WebKit @MainActor @Suite(.serialized) struct OpenAIDashboardScrapeScriptTests { + @Test + func `scraper returns structured account fields without full html`() async throws { + if Self.shouldSkipOnCI() { return } + + let webView = WKWebView(frame: .zero, configuration: WKWebViewConfiguration()) + _ = webView.loadHTMLString(Self.bootstrapAccountHTML, baseURL: nil) + try await Self.waitForFixture(webView, elementID: "account-fixture") + + let any = try await webView.evaluateJavaScript(openAIDashboardScrapeScript) + let dict = try #require(any as? [String: Any]) + + #expect(dict["bodyHTML"] == nil) + #expect(dict["signedInEmail"] as? String == "user@example.com") + #expect(dict["authStatus"] as? String == "logged_in") + #expect(dict["accountPlan"] as? String == "Pro 5x") + } + @Test func `usage breakdown scraper ignores neighboring client charts`() async throws { if Self.shouldSkipOnCI() { return } @@ -74,6 +91,39 @@ struct OpenAIDashboardScrapeScriptTests { } } + private static let bootstrapAccountHTML = """ + + +
Usage limits
+ + + + + """ + private static let multiChartHTML = """