diff --git a/clis/twitter/article.js b/clis/twitter/article.js index ddd135df8..b19b4aa98 100644 --- a/clis/twitter/article.js +++ b/clis/twitter/article.js @@ -1,6 +1,6 @@ import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { cli, Strategy } from '@jackwener/opencli/registry'; -import { resolveTwitterQueryId } from './shared.js'; +import { resolveTwitterQueryId, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN } from './utils.js'; const TWEET_RESULT_BY_REST_ID_QUERY_ID = '7xflPyRiUxGVbJd4uWmbfg'; cli({ @@ -96,7 +96,7 @@ cli({ + '&fieldToggles=' + encodeURIComponent(fieldToggles); const resp = await fetch(url, {headers, credentials: 'include'}); - if (!resp.ok) return {error: 'HTTP ' + resp.status, hint: 'Tweet may not exist or queryId expired'}; + if (!resp.ok) return {httpStatus: resp.status}; const d = await resp.json(); const result = d.data?.tweetResult?.result; @@ -159,6 +159,9 @@ cli({ }]; } `); + if (result?.httpStatus) { + throw new CommandExecutionError(describeTwitterApiError('TweetResultByRestId', result.httpStatus)); + } if (result?.error) { throw new CommandExecutionError(result.error + (result.hint ? ` (${result.hint})` : '')); } diff --git a/clis/twitter/bookmark-folder.js b/clis/twitter/bookmark-folder.js index ae2105011..fc466e685 100644 --- a/clis/twitter/bookmark-folder.js +++ b/clis/twitter/bookmark-folder.js @@ -1,7 +1,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; -import { extractMedia, resolveTwitterQueryId } from './shared.js'; +import { extractMedia, resolveTwitterQueryId, describeTwitterApiError } from './shared.js'; // Companion to bookmark-folders.js: reads tweets inside a single folder. // X exposes folder contents through a separate timeline operation @@ -169,7 +169,7 @@ cli({ }`); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch folder ${folderId}. queryId may have expired, or the folder may not exist.`); + throw new CommandExecutionError(describeTwitterApiError('BookmarkFolderTimeline', data.error, `folder=${folderId}`)); break; } const { tweets, nextCursor } = parseBookmarkFolderTimeline(data, seen); diff --git a/clis/twitter/bookmark-folders.js b/clis/twitter/bookmark-folders.js index ba1d0f35a..ad0a3b536 100644 --- a/clis/twitter/bookmark-folders.js +++ b/clis/twitter/bookmark-folders.js @@ -1,7 +1,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { TWITTER_BEARER_TOKEN } from './utils.js'; -import { resolveTwitterQueryId } from './shared.js'; +import { resolveTwitterQueryId, describeTwitterApiError } from './shared.js'; // X surfaces user-created bookmark folders through a GraphQL slice query. // We mirror the patterns used in bookmarks.js / lists.js: a literal @@ -101,7 +101,7 @@ cli({ return r.ok ? await r.json() : { error: r.status }; }`); if (data?.error) { - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch bookmark folders. queryId may have expired, or your account may not have folder access.`); + throw new CommandExecutionError(describeTwitterApiError('bookmarkFoldersSlice', data.error, 'account may not have folder access')); } const seen = new Set(); return parseBookmarkFolders(data, seen); diff --git a/clis/twitter/bookmarks.js b/clis/twitter/bookmarks.js index 770f48164..17fafcc18 100644 --- a/clis/twitter/bookmarks.js +++ b/clis/twitter/bookmarks.js @@ -1,6 +1,6 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; -import { extractMedia } from './shared.js'; +import { extractMedia, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; const BOOKMARKS_QUERY_ID = 'Fy0QMy4q_aZCpkO0PnyLYw'; const MAX_PAGINATION_PAGES = 100; @@ -162,7 +162,7 @@ cli({ }`); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch bookmarks. queryId may have expired.`); + throw new CommandExecutionError(describeTwitterApiError('Bookmarks', data.error)); break; } const { tweets, nextCursor } = parseBookmarks(data, seen); diff --git a/clis/twitter/device-follow.js b/clis/twitter/device-follow.js index 42ad771f6..2ac15800e 100644 --- a/clis/twitter/device-follow.js +++ b/clis/twitter/device-follow.js @@ -11,6 +11,7 @@ import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; import { cli, Strategy } from '@jackwener/opencli/registry'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; +import { describeTwitterApiError } from './shared.js'; const DEVICE_FOLLOW_PATH = '/i/api/2/notifications/device_follow.json'; const MAX_LIMIT = 200; @@ -165,7 +166,7 @@ cli({ if (data.error === 401 || data.error === 403) { throw new AuthRequiredError('x.com', `Twitter device-follow returned HTTP ${data.error}`); } - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch device-follow notification stream.`); + throw new CommandExecutionError(describeTwitterApiError('device_follow', data.error)); } const parsed = parseDeviceFollow(data, new Set()); if (!parsed) { diff --git a/clis/twitter/following.js b/clis/twitter/following.js index 79ab660a7..0f2dc135c 100644 --- a/clis/twitter/following.js +++ b/clis/twitter/following.js @@ -1,6 +1,6 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; -import { looksLikePrivateTwitterTimeline, normalizeTwitterScreenName, resolveTwitterQueryId, sanitizeQueryId, unwrapBrowserResult } from './shared.js'; +import { looksLikePrivateTwitterTimeline, normalizeTwitterScreenName, resolveTwitterQueryId, sanitizeQueryId, unwrapBrowserResult, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN } from './utils.js'; const FOLLOWING_QUERY_ID = 'F42cDX8PDFxkbjjq6JrM2w'; @@ -238,7 +238,7 @@ cli({ if (data?.error) { if (data.error === 401 || data.error === 403) throw new AuthRequiredError('x.com', `Twitter following request failed (HTTP ${data.error})`); - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch following list. queryId may have expired.`); + throw new CommandExecutionError(describeTwitterApiError('Following', data.error)); } lastRawResponse = data; const { users, nextCursor } = parseFollowing(data); diff --git a/clis/twitter/likes.js b/clis/twitter/likes.js index 1a3121cad..2f8c42432 100644 --- a/clis/twitter/likes.js +++ b/clis/twitter/likes.js @@ -1,6 +1,6 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; -import { looksLikePrivateTwitterTimeline, normalizeTwitterScreenName, resolveTwitterQueryId, sanitizeQueryId, extractMedia, unwrapBrowserResult } from './shared.js'; +import { looksLikePrivateTwitterTimeline, normalizeTwitterScreenName, resolveTwitterQueryId, sanitizeQueryId, extractMedia, unwrapBrowserResult, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; const LIKES_QUERY_ID = 'CDWHmpZeSdIJ3HGeRbNm0w'; const USER_BY_SCREEN_NAME_QUERY_ID = 'IGgvgiOx4QZndDHuD3x9TQ'; @@ -213,7 +213,7 @@ cli({ }`)); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch likes. queryId may have expired.`); + throw new CommandExecutionError(describeTwitterApiError('Likes', data.error)); break; } lastRawResponse = data; diff --git a/clis/twitter/list-tweets.js b/clis/twitter/list-tweets.js index 7485d851a..906454333 100644 --- a/clis/twitter/list-tweets.js +++ b/clis/twitter/list-tweets.js @@ -1,7 +1,8 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; -import { extractMedia, extractCard, extractQuotedTweet } from './shared.js'; +import { BROWSER_JSON_SNIFF_FN, throwIfLoginWall } from '@jackwener/opencli/utils'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; +import { extractCard, extractQuotedTweet, extractMedia, describeTwitterApiError } from './shared.js'; const LIST_TWEETS_QUERY_ID = 'RlZzktZY_9wJynoepm8ZsA'; const OPERATION_NAME = 'ListLatestTweetsTimeline'; @@ -177,13 +178,13 @@ cli({ for (let i = 0; i < MAX_PAGINATION_PAGES && allTweets.length < limit; i++) { const fetchCount = Math.min(100, limit - allTweets.length + 10); const apiUrl = buildUrl(queryId, listId, fetchCount, cursor); - const data = await page.evaluate(`async () => { - const r = await fetch(${JSON.stringify(apiUrl)}, { headers: ${headers}, credentials: 'include' }); - return r.ok ? await r.json() : { error: r.status }; - }`); + const data = throwIfLoginWall(await page.evaluate(`async () => { + ${BROWSER_JSON_SNIFF_FN} + return await fetchJsonOrLoginWall(${JSON.stringify(apiUrl)}, { headers: ${headers}, credentials: 'include' }); + }`), { url: apiUrl }); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch list timeline. queryId may have expired or list may be private.`); + throw new CommandExecutionError(describeTwitterApiError('ListLatestTweetsTimeline', data.error, 'list may be private')); break; } const { tweets, nextCursor } = parseListTimeline(data, seen); diff --git a/clis/twitter/lists.js b/clis/twitter/lists.js index fa6b30f50..e16143cb8 100644 --- a/clis/twitter/lists.js +++ b/clis/twitter/lists.js @@ -1,6 +1,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; import { TWITTER_BEARER_TOKEN } from './utils.js'; +import { describeTwitterApiError } from './shared.js'; const LISTS_QUERY_ID = '78UbkyXwXBD98IgUWXOy9g'; const OPERATION_NAME = 'ListsManagementPageTimeline'; @@ -162,7 +163,7 @@ export const command = cli({ return r.ok ? await r.json() : { error: r.status }; }`); if (data?.error) { - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch lists. queryId may have expired.`); + throw new CommandExecutionError(describeTwitterApiError('ListsManagementPageTimeline', data.error)); } const seen = new Set(); if (!getListsManagementInstructions(data)) { diff --git a/clis/twitter/profile.js b/clis/twitter/profile.js index 93b6c6a12..95d8299c9 100644 --- a/clis/twitter/profile.js +++ b/clis/twitter/profile.js @@ -1,6 +1,6 @@ import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; import { cli, Strategy } from '@jackwener/opencli/registry'; -import { normalizeTwitterScreenName, resolveTwitterQueryId, unwrapBrowserResult } from './shared.js'; +import { describeTwitterApiError, normalizeTwitterScreenName, resolveTwitterQueryId, unwrapBrowserResult } from './shared.js'; import { TWITTER_BEARER_TOKEN } from './utils.js'; const USER_BY_SCREEN_NAME_QUERY_ID = 'IGgvgiOx4QZndDHuD3x9TQ'; @@ -132,6 +132,7 @@ cli({ return { ok: false, auth: resp.status === 401 || resp.status === 403, + httpStatus: resp.status, error: 'HTTP ' + resp.status, hint: 'User may not exist, auth may be required, or queryId expired' }; @@ -152,7 +153,12 @@ cli({ throw new CommandExecutionError('Twitter profile response payload is malformed'); } if (!rawResult.ok) { - const message = rawResult.error + (rawResult.hint ? ` (${rawResult.hint})` : ''); + // For HTTP errors, use fork's rich code mapping (429/401/403/404/5xx differentiation + // from describeTwitterApiError); fall back to the plain message for non-HTTP failures + // (fetch threw, JSON parse failed, payload malformed). + const message = typeof rawResult.httpStatus === 'number' + ? describeTwitterApiError('UserByScreenName', rawResult.httpStatus, rawResult.hint) + : rawResult.error + (rawResult.hint ? ` (${rawResult.hint})` : ''); if (rawResult.auth) { throw new AuthRequiredError('x.com', message); } diff --git a/clis/twitter/search.js b/clis/twitter/search.js index 61127f367..979a7fbda 100644 --- a/clis/twitter/search.js +++ b/clis/twitter/search.js @@ -1,6 +1,6 @@ import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { cli, Strategy } from '@jackwener/opencli/registry'; -import { extractMedia, extractCard, extractQuotedTweet, normalizeTwitterGraphqlPayload, resolveTwitterOperationMetadata } from './shared.js'; +import { extractMedia, extractCard, extractQuotedTweet, normalizeTwitterGraphqlPayload, resolveTwitterOperationMetadata, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; // ── Public-search operator surface ───────────────────────────────────── @@ -44,7 +44,7 @@ const PRODUCT_TO_GRAPHQL_PRODUCT = Object.freeze({ const MAX_PAGINATION_PAGES = 100; const SEARCH_TIMELINE_OPERATION = { - queryId: 'VhUd6vHVmLBcw0uX-6jMLA', + queryId: 'Yw6L66Pw54NHKuq4Dp7b4Q', features: { rweb_video_screen_enabled: true, rweb_cashtags_enabled: true, @@ -318,7 +318,7 @@ cli({ return r.ok ? await r.json() : { error: r.status }; }`)); if (data?.error) { - if (results.length === 0) throw new CommandExecutionError(`HTTP ${data.error}: SearchTimeline fetch failed — queryId may have expired`); + if (results.length === 0) throw new CommandExecutionError(describeTwitterApiError('SearchTimeline', data.error)); break; } const { rows, nextCursor } = parseSearchTimeline(data, seen); diff --git a/clis/twitter/search.test.js b/clis/twitter/search.test.js index b2805325e..218cb8fb1 100644 --- a/clis/twitter/search.test.js +++ b/clis/twitter/search.test.js @@ -4,12 +4,22 @@ import { __test__ } from './search.js'; const { buildSearchQuery, resolveSearchFParam, resolveSearchProduct, buildSearchTimelineRequest, parseSearchTimeline, HAS_CHOICES, EXCLUDE_CHOICES, PRODUCT_CHOICES, EXCLUDE_TO_OPERATOR, PRODUCT_TO_F_PARAM, FROM_USER_PATTERN } = __test__; describe('twitter search command', () => { + // Mocked SearchTimeline operation metadata. The dynamic resolver + // (resolveTwitterOperationMetadata) is exercised via page.evaluate's first + // call. Returning a full {queryId, features, fieldToggles} validates the + // happy path where bundle scan / GitHub fallback succeeds — not the stale + // hardcoded path that previously masked the regex bug. + const DYNAMIC_OP = { + queryId: 'DynamicSearchQid42', + features: { dynamic_test_feature: true }, + fieldToggles: { dynamic_test_toggle: true }, + }; function makeSearchPage(data) { return { getCookies: vi.fn().mockResolvedValue([{ name: 'ct0', value: 'csrf' }]), goto: vi.fn().mockResolvedValue(undefined), evaluate: vi.fn() - .mockResolvedValueOnce(null) // resolveTwitterQueryId fallback + .mockResolvedValueOnce(DYNAMIC_OP) // resolveTwitterOperationMetadata dynamic result .mockResolvedValueOnce(data), }; } @@ -89,6 +99,11 @@ describe('twitter search command', () => { expect(searchFetch).toContain('/SearchTimeline'); expect(searchFetch).toContain("method: 'POST'"); expect(searchFetch).toContain('\\"rawQuery\\":\\"from:alice\\"'); + // Regression guard: the dynamic queryId from resolveTwitterOperationMetadata + // must propagate to the actual GraphQL URL. Previously a bug in the bundle + // parser would return a wrong queryId silently, Twitter would 4xx, and + // search.js raised "queryId may have expired". + expect(searchFetch).toContain('/DynamicSearchQid42/SearchTimeline'); }); it('uses the requested GraphQL product', async () => { diff --git a/clis/twitter/shared.js b/clis/twitter/shared.js index 4f2a7eef9..df945f6f5 100644 --- a/clis/twitter/shared.js +++ b/clis/twitter/shared.js @@ -134,12 +134,21 @@ function keysToFlags(keys) { return Object.fromEntries(keys.filter((key) => typeof key === 'string' && key).map((key) => [key, true])); } +export function normalizeTwitterOperationFlags(value) { + if (Array.isArray(value)) return keysToFlags(value); + if (!value || typeof value !== 'object') return {}; + return Object.fromEntries( + Object.entries(value) + .filter(([key, flag]) => typeof key === 'string' && key && typeof flag === 'boolean'), + ); +} + function normalizeOperationFallback(fallback) { if (typeof fallback === 'string') return { queryId: fallback, features: {}, fieldToggles: {} }; return { queryId: fallback?.queryId || null, - features: fallback?.features || {}, - fieldToggles: fallback?.fieldToggles || {}, + features: normalizeTwitterOperationFlags(fallback?.features), + fieldToggles: normalizeTwitterOperationFlags(fallback?.fieldToggles), }; } @@ -192,41 +201,100 @@ export function sanitizeTwitterOperationMetadata(resolved, fallback) { // surfacing a misleading "queryId expired" error. return { queryId: sanitizeQueryId(value?.queryId, normalizedFallback.queryId), - features: value?.features - && typeof value.features === 'object' - && Object.keys(value.features).length > 0 - ? value.features + features: Object.keys(normalizeTwitterOperationFlags(value?.features)).length > 0 + ? normalizeTwitterOperationFlags(value.features) : normalizedFallback.features, - fieldToggles: value?.fieldToggles - && typeof value.fieldToggles === 'object' - && Object.keys(value.fieldToggles).length > 0 - ? value.fieldToggles + fieldToggles: Object.keys(normalizeTwitterOperationFlags(value?.fieldToggles)).length > 0 + ? normalizeTwitterOperationFlags(value.fieldToggles) : normalizedFallback.fieldToggles, }; } +// Pure helper extracted for unit testing. Used both directly in tests and +// serialized into page.evaluate() below so the same logic runs in-browser. +// +// Why two regexes with [^}] separator instead of cutting a snippet around +// the operationName marker: +// The old approach (lastIndexOf 'e.exports=' / indexOf '}}}') was prone to +// cross-module pollution. In a minified bundle 'e.exports=' is dense, and +// the snippet often spanned multiple operation modules. snippet.match(/queryId/) +// would then return the FIRST queryId in the snippet — frequently belonging +// to a different operation — and Twitter would reject it as expired. +// Anchoring queryId immediately adjacent to operationName (≤400 chars, +// non-} characters only) guarantees the queryId belongs to this operation. +export function parseOperationFromBundleText(text, operationName) { + if (!text || !operationName) return null; + const esc = operationName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const reA = new RegExp(`queryId:"([A-Za-z0-9_-]+)"[^}]{0,400}operationName:"${esc}"`); + const reB = new RegExp(`operationName:"${esc}"[^}]{0,400}queryId:"([A-Za-z0-9_-]+)"`); + let queryId = null; + let matchIndex = -1; + const mA = text.match(reA); + if (mA && typeof mA.index === 'number') { + queryId = mA[1]; + matchIndex = mA.index; + } else { + const mB = text.match(reB); + if (mB && typeof mB.index === 'number') { + queryId = mB[1]; + matchIndex = mB.index; + } + } + if (!queryId) return null; + const winStart = Math.max(0, matchIndex - 500); + const winEnd = Math.min(text.length, matchIndex + 1500); + const win = text.slice(winStart, winEnd); + const quotedKeys = (source) => source + ? Array.from(source.matchAll(/"([^"]+)"/g)).map((m) => m[1]) + : []; + const flags = (keys) => Object.fromEntries( + (keys || []).filter((k) => typeof k === 'string' && k).map((k) => [k, true]), + ); + return { + queryId, + features: flags(quotedKeys(win.match(/featureSwitches:\[([^\]]*)\]/)?.[1])), + fieldToggles: flags(quotedKeys(win.match(/fieldToggles:\[([^\]]*)\]/)?.[1])), + }; +} + export async function resolveTwitterOperationMetadata(page, operationName, fallback) { + const parserSource = parseOperationFromBundleText.toString(); + // Order: GitHub placeholder.json FIRST (more reliable — fa0311/twitter-openapi + // tracks Twitter's queryId rotation), bundle scan SECOND as offline fallback. + // The previous order (bundle-first) silently returned wrong queryIds from + // cross-module snippet pollution and never reached the GitHub path. const resolved = await page.evaluate(`async () => { const operationName = ${JSON.stringify(operationName)}; - const keysToFlags = (keys) => Object.fromEntries((keys || []).map((key) => [key, true])); - const quotedKeys = (source) => source - ? Array.from(source.matchAll(/"([^"]+)"/g)).map((match) => match[1]) - : []; - const parseOperation = (text) => { - const marker = 'operationName:"' + operationName + '"'; - const index = text.indexOf(marker); - if (index < 0) return null; - const start = Math.max(0, text.lastIndexOf('e.exports=', index)); - const endMarker = text.indexOf('}}}', index); - const snippet = text.slice(start, endMarker > index ? endMarker + 3 : index + 2500); - const queryId = snippet.match(/queryId:"([A-Za-z0-9_-]+)"/)?.[1] || null; - if (!queryId) return null; - return { - queryId, - features: keysToFlags(quotedKeys(snippet.match(/featureSwitches:\\[([^\\]]*)\\]/)?.[1])), - fieldToggles: keysToFlags(quotedKeys(snippet.match(/fieldToggles:\\[([^\\]]*)\\]/)?.[1])), - }; + const keysToFlags = (keys) => Object.fromEntries((keys || []).filter((k) => typeof k === 'string' && k).map((key) => [key, true])); + const normalizeFlags = (value) => { + if (Array.isArray(value)) return keysToFlags(value); + if (!value || typeof value !== 'object') return {}; + return Object.fromEntries(Object.entries(value).filter(([key, flag]) => typeof key === 'string' && key && typeof flag === 'boolean')); }; + const parseOperationFromBundleText = ${parserSource}; + + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 5000); + try { + const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json', { signal: controller.signal }); + clearTimeout(timeout); + if (ghResp.ok) { + const data = await ghResp.json(); + const entry = data && data[operationName]; + if (entry && entry.queryId) { + return { + queryId: entry.queryId, + features: normalizeFlags(entry.features ?? entry.featureSwitches), + fieldToggles: normalizeFlags(entry.fieldToggles), + }; + } + } + } catch { + clearTimeout(timeout); + } + } catch {} + try { const scripts = Array.from(document.scripts) .map(s => s.src) @@ -235,33 +303,18 @@ export async function resolveTwitterOperationMetadata(page, operationName, fallb .map(r => r.name) .filter(r => r.includes('client-web') && r.endsWith('.js'))); const uniqueScripts = Array.from(new Set(scripts)); - for (const scriptUrl of uniqueScripts.slice(-30)) { + const head = uniqueScripts.slice(0, 15); + const tail = uniqueScripts.slice(-15); + const candidates = Array.from(new Set([...head, ...tail])); + for (const scriptUrl of candidates) { try { const text = await (await fetch(scriptUrl)).text(); - const operation = parseOperation(text); + const operation = parseOperationFromBundleText(text, operationName); if (operation) return operation; } catch {} } } catch {} - const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 5000); - try { - const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json', { signal: controller.signal }); - clearTimeout(timeout); - if (ghResp.ok) { - const data = await ghResp.json(); - const entry = data?.[operationName]; - if (entry && entry.queryId) { - return { - queryId: entry.queryId, - features: keysToFlags(entry.featureSwitches), - fieldToggles: keysToFlags(entry.fieldToggles), - }; - } - } - } catch { - clearTimeout(timeout); - } + return null; }`); return sanitizeTwitterOperationMetadata(resolved, fallback); @@ -394,7 +447,8 @@ export function extractQuotedTweet(tweet) { const q = tweet?.quoted_status_result?.result ?? tweet?.legacy?.quoted_status_result?.result; // `result` can be a tombstone (`__typename: 'TweetTombstone'`) or - // `'TweetUnavailable'` when the quoted tweet was deleted / privacy-restricted. + // `'TweetUnavailable'` when the quoted tweet was deleted / privacy-restricted — + // it has no `legacy`, so the downstream null-check covers both cases. if (!q) return null; // Nested `tweet` wrapper appears on TweetWithVisibilityResults — same // shim that callers already do at the top level (`tw.tweet || tw`). @@ -440,8 +494,46 @@ export function extractQuotedTweet(tweet) { return out; } +/** + * Translate a non-200 Twitter API response into a message that distinguishes + * the actual HTTP failure mode, so callers (scripts / scrapers / pipelines) + * can choose retry / cooldown / re-auth / drop without misreading "queryId + * expired" as the universal cause. + * + * @param {string} operation - GraphQL operationName or REST endpoint label + * (e.g. 'SearchTimeline', 'TweetDetail', + * 'device_follow'); used in the error prefix. + * @param {number|string} status - HTTP status code from page.evaluate fetch + * (e.g. r.status); coerced to Number. + * @param {string} [extraHint] - Optional adapter-specific hint appended after + * the generic explanation (e.g. "list may be + * private", "folder may not exist"). + * @returns {string} Message intended for `new CommandExecutionError(...)`. + */ +export function describeTwitterApiError(operation, status, extraHint) { + const code = Number(status); + const prefix = `HTTP ${status}: ${operation} fetch failed`; + let suffix; + if (code === 429) { + suffix = 'rate-limited by Twitter (session quota); retry after cooldown (typically 15-30 min)'; + } else if (code === 401) { + suffix = 'auth failed (cookie expired or invalidated); re-login required'; + } else if (code === 403) { + suffix = 'forbidden (cookie lacks scope, or resource is private)'; + } else if (code === 404) { + suffix = 'resource not found (deleted, suspended, or private)'; + } else if (code >= 500 && code < 600) { + suffix = 'Twitter server error; retry later'; + } else { + suffix = 'possibly queryId expired, schema change, or transient'; + } + if (extraHint) suffix = `${suffix} (${extraHint})`; + return `${prefix} — ${suffix}`; +} + export const __test__ = { sanitizeQueryId, + normalizeTwitterOperationFlags, sanitizeTwitterOperationMetadata, unwrapBrowserResult, normalizeTwitterGraphqlPayload, @@ -452,4 +544,6 @@ export const __test__ = { parseTweetUrl, buildTwitterArticleScopeSource, looksLikePrivateTwitterTimeline, + parseOperationFromBundleText, + describeTwitterApiError, }; diff --git a/clis/twitter/shared.test.js b/clis/twitter/shared.test.js index e2461ce21..782bf3020 100644 --- a/clis/twitter/shared.test.js +++ b/clis/twitter/shared.test.js @@ -3,7 +3,21 @@ import { JSDOM } from 'jsdom'; import { __test__ } from './shared.js'; import { ArgumentError } from '@jackwener/opencli/errors'; -const { extractMedia, extractCard, extractQuotedTweet, parseTweetUrl, buildTwitterArticleScopeSource, unwrapBrowserResult, normalizeTwitterGraphqlPayload, normalizeTwitterScreenName, sanitizeTwitterOperationMetadata, looksLikePrivateTwitterTimeline } = __test__; +const { + extractMedia, + extractCard, + extractQuotedTweet, + parseTweetUrl, + buildTwitterArticleScopeSource, + unwrapBrowserResult, + normalizeTwitterGraphqlPayload, + normalizeTwitterScreenName, + normalizeTwitterOperationFlags, + sanitizeTwitterOperationMetadata, + looksLikePrivateTwitterTimeline, + parseOperationFromBundleText, + describeTwitterApiError, +} = __test__; function makeCardTweet({ name, bindings, expandedUrl, urls }) { const tweet = { @@ -48,12 +62,6 @@ describe('twitter browser result helpers', () => { }); it('falls back to baked features / fieldToggles when the bundle parser returns empty maps', () => { - // Regression guard: resolveTwitterOperationMetadata's bundle parser can - // find a queryId but miss `featureSwitches:[...]` (e.g. minification - // change, or the 2500-char snippet window truncating before the array). - // In that case keysToFlags(undefined) returns {}; if sanitize kept the - // empty map, Twitter would receive a request with no features and reply - // 400, surfacing a misleading "queryId expired" error. const result = sanitizeTwitterOperationMetadata({ queryId: 'newQueryId', features: {}, @@ -84,6 +92,48 @@ describe('twitter browser result helpers', () => { expect(result.fieldToggles).toEqual({ fallback_field: true }); }); + it('normalizes operation feature arrays and boolean maps without losing false flags', () => { + expect(normalizeTwitterOperationFlags(['feature_a', 'feature_b'])).toEqual({ + feature_a: true, + feature_b: true, + }); + expect(normalizeTwitterOperationFlags({ + rweb_video_screen_enabled: false, + responsive_web_graphql_timeline_navigation_enabled: true, + ignored_non_boolean: 'true', + })).toEqual({ + rweb_video_screen_enabled: false, + responsive_web_graphql_timeline_navigation_enabled: true, + }); + }); + + it('keeps GitHub placeholder-style features boolean maps instead of falling back', () => { + const result = sanitizeTwitterOperationMetadata({ + queryId: 'newQueryId', + features: { + rweb_video_screen_enabled: false, + responsive_web_graphql_timeline_navigation_enabled: true, + }, + fieldToggles: { + withArticlePlainText: true, + }, + }, { + queryId: 'fallback', + features: { fallback_feature: true }, + fieldToggles: { fallback_field: true }, + }); + expect(result).toEqual({ + queryId: 'newQueryId', + features: { + rweb_video_screen_enabled: false, + responsive_web_graphql_timeline_navigation_enabled: true, + }, + fieldToggles: { + withArticlePlainText: true, + }, + }); + }); + it('normalizes GraphQL payloads when the bridge strips the top-level data key', () => { expect(normalizeTwitterGraphqlPayload({ user: { result: {} } })).toEqual({ data: { user: { result: {} } }, @@ -95,6 +145,76 @@ describe('twitter browser result helpers', () => { }); }); +describe('parseOperationFromBundleText (bundle queryId resolver)', () => { + it('extracts queryId + featureSwitches + fieldToggles when queryId precedes operationName', () => { + const bundle = 'something={queryId:"FRESH_ID_AAA",operationName:"SearchTimeline",metadata:{featureSwitches:["feat_one","feat_two"],fieldToggles:["ft_one"]}};other'; + const result = parseOperationFromBundleText(bundle, 'SearchTimeline'); + expect(result).toEqual({ + queryId: 'FRESH_ID_AAA', + features: { feat_one: true, feat_two: true }, + fieldToggles: { ft_one: true }, + }); + }); + + it('extracts queryId when operationName precedes queryId (reverse order)', () => { + const bundle = 'mod={operationName:"SearchTimeline",queryId:"REVERSED_ID_BBB",metadata:{featureSwitches:["x"],fieldToggles:[]}};tail'; + const result = parseOperationFromBundleText(bundle, 'SearchTimeline'); + expect(result?.queryId).toBe('REVERSED_ID_BBB'); + expect(result?.features).toEqual({ x: true }); + }); + + it("does not return another operation's queryId from a neighboring module (cross-module pollution)", () => { + // Regression guard: the previous resolver cut a snippet via lastIndexOf('e.exports=') + // / indexOf('}}}') around the operationName marker, then ran /queryId:"..."/ + // unanchored. That returned the first queryId in the snippet — often + // belonging to a different operation in the same chunk. Twitter then + // rejected the request as a stale queryId, surfacing as "queryId expired". + const bundle = [ + 'e.exports={queryId:"OTHER_QID_AAA",operationName:"UserTweets",metadata:{}};', + 'e.exports={queryId:"SEARCH_QID_BBB",operationName:"SearchTimeline",metadata:{featureSwitches:["f1"],fieldToggles:[]}};', + ].join(''); + const result = parseOperationFromBundleText(bundle, 'SearchTimeline'); + expect(result?.queryId).toBe('SEARCH_QID_BBB'); + expect(result?.queryId).not.toBe('OTHER_QID_AAA'); + }); + + it('returns null when operationName is absent from the bundle', () => { + const bundle = 'no operation here, just queryId:"STRAY_QID" floating around'; + expect(parseOperationFromBundleText(bundle, 'SearchTimeline')).toBeNull(); + }); + + it('returns null when queryId is too far from operationName (cross-object)', () => { + // The [^}]{0,400} separator prevents matches that cross object boundaries + // (a `}` between queryId and operationName means they belong to different + // objects/modules). + const bundle = 'e.exports={queryId:"WRONG_QID"};lots_of_other_code={};e.exports={operationName:"SearchTimeline",noQueryIdHere:true};'; + expect(parseOperationFromBundleText(bundle, 'SearchTimeline')).toBeNull(); + }); + + it('falls back to empty features when featureSwitches array is not in the window', () => { + // sanitizeTwitterOperationMetadata then uses the baked fallback features. + const bundle = 'e.exports={queryId:"BARE_QID",operationName:"SearchTimeline"};'; + const result = parseOperationFromBundleText(bundle, 'SearchTimeline'); + expect(result?.queryId).toBe('BARE_QID'); + expect(result?.features).toEqual({}); + expect(result?.fieldToggles).toEqual({}); + }); + + it('escapes regex metacharacters in operationName', () => { + // Defensive — operationName values are controlled by callers but escaping + // protects against future operation names with special chars. + const bundle = 'queryId:"ESCAPED_QID"___operationName:"Foo.Bar"___'; + const result = parseOperationFromBundleText(bundle, 'Foo.Bar'); + expect(result?.queryId).toBe('ESCAPED_QID'); + }); + + it('returns null for empty / invalid input', () => { + expect(parseOperationFromBundleText('', 'SearchTimeline')).toBeNull(); + expect(parseOperationFromBundleText('text', '')).toBeNull(); + expect(parseOperationFromBundleText(null, 'SearchTimeline')).toBeNull(); + }); +}); + describe('twitter normalizeTwitterScreenName', () => { it('accepts exact handles and exact Twitter/X profile URLs', () => { expect(normalizeTwitterScreenName('@viewer')).toBe('viewer'); @@ -792,3 +912,80 @@ describe('looksLikePrivateTwitterTimeline', () => { })).toBe(true); }); }); + +describe('describeTwitterApiError', () => { + it('429 -> rate-limited language so callers cool down instead of treating as queryId expiry', () => { + const msg = describeTwitterApiError('SearchTimeline', 429); + expect(msg).toContain('HTTP 429'); + expect(msg).toContain('SearchTimeline'); + expect(msg).toContain('rate-limited'); + expect(msg).toContain('cooldown'); + expect(msg).not.toContain('queryId'); + }); + + it('401 -> auth-failed language so callers trigger re-login instead of retrying', () => { + const msg = describeTwitterApiError('TweetDetail', 401); + expect(msg).toContain('HTTP 401'); + expect(msg).toContain('auth failed'); + expect(msg).toContain('re-login'); + }); + + it('403 -> forbidden language with cookie-scope hint', () => { + const msg = describeTwitterApiError('Likes', 403); + expect(msg).toContain('HTTP 403'); + expect(msg).toContain('forbidden'); + }); + + it('404 -> not-found language so callers map to empty/missing', () => { + const msg = describeTwitterApiError('UserByScreenName', 404); + expect(msg).toContain('HTTP 404'); + expect(msg).toContain('not found'); + }); + + it('5xx (500) -> server-error language so callers retry later', () => { + const msg = describeTwitterApiError('Bookmarks', 500); + expect(msg).toContain('HTTP 500'); + expect(msg).toContain('server error'); + }); + + it('5xx (503) -> server-error language', () => { + const msg = describeTwitterApiError('UserTweets', 503); + expect(msg).toContain('HTTP 503'); + expect(msg).toContain('server error'); + }); + + it('unknown code -> falls back to queryId/schema-change hint (the original guess, now scoped)', () => { + const msg = describeTwitterApiError('SearchTimeline', 999); + expect(msg).toContain('HTTP 999'); + expect(msg).toContain('queryId'); + expect(msg).toContain('schema change'); + }); + + it('numeric-string status (e.g. "429" from JSON) still routes to rate-limited branch', () => { + const msg = describeTwitterApiError('SearchTimeline', '429'); + expect(msg).toContain('rate-limited'); + }); + + it('appends adapter-specific extraHint in parentheses after the generic suffix', () => { + const msg = describeTwitterApiError('ListLatestTweetsTimeline', 404, 'list may be private'); + expect(msg).toContain('not found'); + expect(msg).toContain('(list may be private)'); + }); + + it('runtime-interpolated extraHint (e.g. folder id) is preserved verbatim', () => { + const msg = describeTwitterApiError('BookmarkFolderTimeline', 404, 'folder=abc123'); + expect(msg).toContain('(folder=abc123)'); + }); + + it('preserves "HTTP :" prefix for backward-compat string matching by downstream pipelines', () => { + // Downstream callers (ml-scout, ad-hoc scripts) may regex-match + // /HTTP (\d+):/ to extract status code. The refactor must keep + // that prefix verbatim so existing log parsers don't break. + const cases = [400, 401, 403, 404, 429, 500, 503, 999]; + for (const code of cases) { + const msg = describeTwitterApiError('SearchTimeline', code); + expect(msg).toMatch(/^HTTP \d+: /); + expect(msg).toContain(`HTTP ${code}:`); + } + }); +}); diff --git a/clis/twitter/thread.js b/clis/twitter/thread.js index 1a4a73272..23a828345 100644 --- a/clis/twitter/thread.js +++ b/clis/twitter/thread.js @@ -1,6 +1,7 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; -import { extractMedia, extractCard, extractQuotedTweet } from './shared.js'; +import { BROWSER_JSON_SNIFF_FN, throwIfLoginWall } from '@jackwener/opencli/utils'; +import { extractMedia, extractCard, extractQuotedTweet, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; // ── Twitter GraphQL constants ────────────────────────────────────────── const TWEET_DETAIL_QUERY_ID = 'nBS-WpgA6ZG0CyNHD517JQ'; @@ -138,14 +139,16 @@ cli({ let cursor = null; for (let i = 0; i < 5; i++) { const apiUrl = buildTweetDetailUrl(tweetId, cursor); - // Browser-side: just fetch + return JSON (3 lines) - const data = await page.evaluate(`async () => { - const r = await fetch("${apiUrl}", { headers: ${headers}, credentials: 'include' }); - return r.ok ? await r.json() : { error: r.status }; - }`); + // Browser-side: fetch + JSON parse with HTML-as-JSON sniffer so a + // login wall / WAF page surfaces as a structured LoginWallError + // instead of `SyntaxError: Unexpected token '<'`. + const data = throwIfLoginWall(await page.evaluate(`async () => { + ${BROWSER_JSON_SNIFF_FN} + return await fetchJsonOrLoginWall("${apiUrl}", { headers: ${headers}, credentials: 'include' }); + }`), { url: apiUrl }); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Tweet not found or queryId expired`); + throw new CommandExecutionError(describeTwitterApiError('TweetDetail', data.error)); break; } // TypeScript-side: type-safe parsing + cursor extraction diff --git a/clis/twitter/timeline.js b/clis/twitter/timeline.js index 773d1575c..21f5cbbff 100644 --- a/clis/twitter/timeline.js +++ b/clis/twitter/timeline.js @@ -1,6 +1,6 @@ import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; import { cli, Strategy } from '@jackwener/opencli/registry'; -import { resolveTwitterQueryId, extractMedia, extractCard, extractQuotedTweet } from './shared.js'; +import { resolveTwitterQueryId, extractMedia, extractCard, extractQuotedTweet, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; // ── Twitter GraphQL constants ────────────────────────────────────────── const HOME_TIMELINE_QUERY_ID = 'c-CzHF1LboFilMpsx4ZCrQ'; @@ -191,7 +191,7 @@ cli({ }`); if (data?.error) { if (allTweets.length === 0) - throw new CommandExecutionError(`HTTP ${data.error}: Failed to fetch timeline. queryId may have expired.`); + throw new CommandExecutionError(describeTwitterApiError(endpoint, data.error)); break; } const { tweets, nextCursor } = parseHomeTimeline(data, seen); diff --git a/clis/twitter/tweets.js b/clis/twitter/tweets.js index 31d27e7ba..da38d7787 100644 --- a/clis/twitter/tweets.js +++ b/clis/twitter/tweets.js @@ -1,7 +1,6 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; -import { resolveTwitterOperationMetadata, sanitizeQueryId, extractMedia, extractQuotedTweet, normalizeTwitterGraphqlPayload, unwrapBrowserResult } from './shared.js'; -import { normalizeTwitterScreenName } from './shared.js'; +import { resolveTwitterOperationMetadata, sanitizeQueryId, extractMedia, extractQuotedTweet, normalizeTwitterGraphqlPayload, unwrapBrowserResult, normalizeTwitterScreenName, describeTwitterApiError } from './shared.js'; import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js'; const USER_TWEETS_QUERY_ID = 'lrMzG9qPQHpqJdP3AbM-bQ'; @@ -291,7 +290,7 @@ cli({ return r.ok ? await r.json() : { error: r.status }; }`)); if (data?.error) { - if (all.length === 0) throw new CommandExecutionError(`HTTP ${data.error}: UserTweets fetch failed — queryId may have expired`); + if (all.length === 0) throw new CommandExecutionError(describeTwitterApiError('UserTweets', data.error)); break; } const { tweets, nextCursor } = parseUserTweets(data, seen);