diff --git a/README.md b/README.md index e033cd2..7fd9c8b 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,10 @@ twitter search --from bbc --exclude retweets --has links twitter search "topic" -o results.json # Save to file twitter search "trending" --filter # Apply ranking filter +# Explore News +twitter news # Personalized Explore > News stories +twitter news --max 20 --json # News stories as structured output + # Tweet detail (view tweet + replies) twitter tweet 1234567890 twitter tweet 1234567890 --full-text diff --git a/tests/test_cli.py b/tests/test_cli.py index 5c6c58a..7906d4c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -729,6 +729,38 @@ def fetch_search(self, query: str, count: int, product: str): assert captured["query"] == "from:bbc" +def test_cli_news_json(monkeypatch) -> None: + captured = {} + + class FakeClient: + def fetch_explore_news(self, count: int): + captured["count"] = count + return [ + { + "rank": 1, + "title": "Major AI story", + "context": "Trending now · News · 105 posts", + "post_count": 105, + "url": "https://x.com/i/trending/123", + } + ] + + monkeypatch.setattr("twitter_cli.cli._get_client", lambda config=None, quiet=False: FakeClient()) + monkeypatch.setattr( + "twitter_cli.cli.load_config", + lambda: {"fetch": {"count": 50}, "filter": {}, "rateLimit": {}}, + ) + runner = CliRunner() + + result = runner.invoke(cli, ["news", "--max", "5", "--json"]) + + assert result.exit_code == 0, f"news failed: {result.output}" + payload = json.loads(result.output) + assert payload["ok"] is True + assert payload["data"][0]["title"] == "Major AI story" + assert captured == {"count": 5} + + def test_cli_search_empty_query_no_options() -> None: runner = CliRunner() diff --git a/tests/test_client.py b/tests/test_client.py index c1393d3..8617067 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1545,3 +1545,104 @@ def mock_post(operation_name, variables, features=None): assert captured.get("product") == "Latest" assert captured.get("querySource") == "typed_query" + + +class TestFetchExploreNews: + def _make_client(self): + client = TwitterClient.__new__(TwitterClient) + client._auth_token = "tok" + client._ct0 = "ct0" + client._cookie_string = None + client._request_delay = 0 + client._max_retries = 0 + client._retry_base_delay = 0 + client._max_count = 200 + client._client_transaction = None + client._ct_init_attempted = True + return client + + def test_fetch_explore_news_uses_news_timeline_id(self): + client = self._make_client() + calls = [] + + def mock_graphql_get(operation_name, variables, features, field_toggles=None): + calls.append((operation_name, variables)) + if operation_name == "ExplorePage": + return { + "data": { + "explore_page": { + "body": { + "timelines": [ + {"id": "for_you", "timeline": {"id": "for-you-id"}}, + {"id": "news", "timeline": {"id": "news-id"}}, + ] + } + } + } + } + if operation_name == "GenericTimelineById": + assert variables["timelineId"] == "news-id" + return { + "data": { + "timeline": { + "timeline": { + "instructions": [ + { + "type": "TimelineAddEntries", + "entries": [ + { + "entryId": "stories", + "content": { + "entryType": "TimelineTimelineModule", + "items": [ + { + "item": { + "itemContent": { + "__typename": "TimelineTrend", + "is_ai_trend": True, + "name": "Major AI story", + "social_context": { + "text": "Trending now · News · 26K posts" + }, + "trend_metadata": { + "url": { + "url": "twitter://trending/123", + "urlType": "DeepLink", + } + }, + } + } + } + ], + }, + } + ], + } + ] + } + } + } + } + raise AssertionError(operation_name) + + client._graphql_get = mock_graphql_get + + stories = client.fetch_explore_news(count=5) + + assert calls[0][0] == "ExplorePage" + assert calls[1][0] == "GenericTimelineById" + assert stories == [ + { + "rank": 1, + "title": "Major AI story", + "name": "Major AI story", + "category": "", + "context": "Trending now · News · 26K posts", + "query": "Major AI story", + "url": "https://x.com/i/trending/123", + "post_count": 26000, + "trend_id": "123", + "is_ai_trend": True, + "type": "TimelineTrend", + } + ] diff --git a/twitter_cli/cli.py b/twitter_cli/cli.py index d2dc523..c35bb1e 100644 --- a/twitter_cli/cli.py +++ b/twitter_cli/cli.py @@ -8,6 +8,7 @@ twitter bookmarks folders # tweets in a folder twitter search "query" # search tweets twitter search "query" --from user # advanced search + twitter news # personalized Explore > News stories twitter user elonmusk # user profile twitter user-posts elonmusk # user tweets twitter likes elonmusk # user likes @@ -43,6 +44,7 @@ import click from rich.console import Console +from rich.table import Table import yaml from . import __version__ @@ -358,6 +360,27 @@ def _emit_timeline_structured(tweets, next_cursor, *, as_json, as_yaml): return emit_structured(payload, as_json=as_json, as_yaml=as_yaml) +def _print_news_table(stories, title="News stories"): + # type: (List[dict], str) -> None + """Print Explore News stories in a compact table.""" + table = Table(title=title, show_lines=False) + table.add_column("#", justify="right", style="dim", width=4) + table.add_column("Story", style="bold", overflow="fold") + table.add_column("Context", overflow="fold") + table.add_column("Posts", justify="right") + + for story in stories: + count = story.get("post_count") + table.add_row( + str(story.get("rank", "")), + str(story.get("title", "")), + str(story.get("context", "") or story.get("category", "") or ""), + f"{count:,}" if isinstance(count, int) and count > 0 else "", + ) + console.print(table) + console.print() + + def _run_bookmarks_command(max_count, as_json, as_yaml, output_file, do_filter, compact=False, full_text=False): # type: (Optional[int], bool, bool, Optional[str], bool, bool, bool) -> None config = load_config() @@ -797,6 +820,29 @@ def _run(): _run_guarded(_run) +@cli.command(name="news") +@click.option("--max", "-n", "max_count", type=int, default=20, show_default=True, help="Max stories to return.") +@structured_output_options +def news(max_count, as_json, as_yaml): + # type: (int, bool, bool) -> None + """Fetch personalized stories from Explore > News.""" + config = load_config() + + def _run(): + rich_output = use_rich_output(as_json=as_json, as_yaml=as_yaml) + client = _get_client(config, quiet=not rich_output) + if rich_output: + console.print("🗞️ Fetching Explore > News stories...\n") + stories = client.fetch_explore_news(count=max_count) + + if emit_structured(stories, as_json=as_json, as_yaml=as_yaml): + return + + _print_news_table(stories, title="🗞️ Explore News — %d stories" % len(stories)) + + _run_guarded(_run) + + @cli.command() @click.argument("screen_name") @click.option("--max", "-n", "max_count", type=int, default=None, help="Max number of tweets to fetch.") diff --git a/twitter_cli/client.py b/twitter_cli/client.py index 0436c8e..faff1fb 100644 --- a/twitter_cli/client.py +++ b/twitter_cli/client.py @@ -9,6 +9,7 @@ import mimetypes import os import random +import re import time import urllib.parse from typing import TYPE_CHECKING, Any, Callable, cast @@ -130,6 +131,156 @@ def _url_fetch(url, headers=None): return resp.text +def _coerce_string(value): + # type: (Any) -> str + if isinstance(value, str): + return value.strip() + return "" + + +def _first_text(*values): + # type: (*Any) -> str + for value in values: + text = _coerce_string(value) + if text: + return text + return "" + + +def _extract_post_count(text): + # type: (str) -> Optional[int] + match = re.search(r"([\d,.]+)\s*([KMB])?\s+(?:posts?|tweets?)", text or "", re.IGNORECASE) + if not match: + return None + value = _parse_int(match.group(1), 0) + suffix = (match.group(2) or "").upper() + if suffix == "K": + value *= 1_000 + elif suffix == "M": + value *= 1_000_000 + elif suffix == "B": + value *= 1_000_000_000 + return value + + +def _twitter_deeplink_to_url(url): + # type: (Optional[str]) -> Optional[str] + if not url: + return None + if url.startswith(("https://", "http://")): + return url + if url.startswith("twitter://trending/"): + trend_id = url.rsplit("/", 1)[-1] + return "https://x.com/i/trending/%s" % urllib.parse.quote(trend_id) + if url.startswith("twitter://search/"): + parsed = urllib.parse.urlparse(url) + query = urllib.parse.parse_qs(parsed.query) + raw_query = query.get("query", [""])[0] + if raw_query: + return "https://x.com/search?q=%s" % urllib.parse.quote(raw_query) + return "https://x.com/explore" + return url + + +def _extract_query_from_url(url): + # type: (Optional[str]) -> str + if not url: + return "" + parsed = urllib.parse.urlparse(url) + params = urllib.parse.parse_qs(parsed.query) + return _first_text(params.get("query", [""])[0], params.get("q", [""])[0]) + + +def _extract_trend_id(url): + # type: (Optional[str]) -> Optional[str] + if not url: + return None + match = re.search(r"(?:twitter://trending/|/i/trending/)(\d+)", url) + if match: + return match.group(1) + return None + + +def _normalize_explore_story(raw, rank): + # type: (Any, int) -> Optional[Dict[str, Any]] + if not isinstance(raw, dict): + return None + + typename = raw.get("__typename") or raw.get("itemType") or "" + metadata = raw.get("trend_metadata") or {} + social_context = raw.get("social_context") or {} + url_obj = metadata.get("url") or raw.get("trend_url") or raw.get("url") or {} + raw_url = url_obj.get("url") if isinstance(url_obj, dict) else _coerce_string(url_obj) + + title = _first_text( + raw.get("name"), + raw.get("title"), + raw.get("headline"), + raw.get("display_text"), + raw.get("text"), + _deep_get(raw, "content", "title"), + ) + if not title: + return None + + context = _first_text( + social_context.get("text"), + metadata.get("domain_context"), + raw.get("context"), + raw.get("description"), + ) + category = _first_text(metadata.get("domain_context")) + + return { + "rank": rank, + "title": title, + "name": title, + "category": category, + "context": context, + "query": _extract_query_from_url(raw_url) or title, + "url": _twitter_deeplink_to_url(raw_url), + "post_count": _extract_post_count(context), + "trend_id": _extract_trend_id(raw_url), + "is_ai_trend": bool(raw.get("is_ai_trend")), + "type": typename, + } + + +def _extract_explore_item_contents(instructions): + # type: (Any) -> List[Dict[str, Any]] + items = [] # type: List[Dict[str, Any]] + for instruction in instructions or []: + for entry in instruction.get("entries", []) or []: + content = entry.get("content", {}) or {} + item_content = content.get("itemContent") + if isinstance(item_content, dict): + items.append(item_content) + + for module_item in content.get("items", []) or []: + module_content = _deep_get(module_item, "item", "itemContent") + if isinstance(module_content, dict): + items.append(module_content) + return items + + +def _normalize_explore_items(instructions, count=20): + # type: (Any, int) -> List[Dict[str, Any]] + stories = [] # type: List[Dict[str, Any]] + seen = set() + for raw in _extract_explore_item_contents(instructions): + story = _normalize_explore_story(raw, rank=len(stories) + 1) + if not story: + continue + key = (story.get("title"), story.get("url")) + if key in seen: + continue + seen.add(key) + stories.append(story) + if len(stories) >= count: + break + return stories + + # ── TwitterClient ──────────────────────────────────────────────────────── @@ -370,6 +521,39 @@ def fetch_search(self, query, count=20, product="Top"): use_post=True, ) + def fetch_explore_news(self, count=20): + # type: (int) -> List[Dict[str, Any]] + """Fetch personalized Explore > News story cards.""" + if count <= 0: + return [] + count = min(count, self._max_count) + + page = self._graphql_get( + "ExplorePage", + {"cursor": "", "context": "news"}, + FEATURES, + ) + timelines = _deep_get(page, "data", "explore_page", "body", "timelines") or [] + news_timeline_id = None + for timeline in timelines: + if timeline.get("id") == "news": + news_timeline_id = _deep_get(timeline, "timeline", "id") + break + if not news_timeline_id: + raise TwitterAPIError(0, "Explore News timeline was not found") + + data = self._graphql_get( + "GenericTimelineById", + { + "timelineId": news_timeline_id, + "count": min(count + 5, 40), + "withQuickPromoteEligibilityTweetFields": True, + }, + FEATURES, + ) + instructions = _deep_get(data, "data", "timeline", "timeline", "instructions") + return _normalize_explore_items(instructions, count=count) + def fetch_tweet_detail(self, tweet_id, count=20): # type: (str, int) -> List[Tweet] """Fetch a tweet and its conversation thread (replies).""" diff --git a/twitter_cli/commands/__init__.py b/twitter_cli/commands/__init__.py index c6e8be2..7d9e13c 100644 --- a/twitter_cli/commands/__init__.py +++ b/twitter_cli/commands/__init__.py @@ -1,7 +1,7 @@ """CLI command sub-modules for twitter-cli. Commands are split into three groups: - - read: feed, bookmarks, search, tweet, article, show, list, favorites + - read: feed, bookmarks, search, news, tweet, article, show, list, favorites - write: post, reply, quote, delete, like/unlike, retweet/unretweet, bookmark/unbookmark - user: user, user-posts, likes, followers, following, whoami, status, follow/unfollow """ diff --git a/twitter_cli/graphql.py b/twitter_cli/graphql.py index d34ea35..cd54d59 100644 --- a/twitter_cli/graphql.py +++ b/twitter_cli/graphql.py @@ -34,6 +34,8 @@ "TweetDetail": "xd_EMdYvB9hfZsZ6Idri0w", "Likes": "lIDpu_NWL7_VhimGGt0o6A", "SearchTimeline": "VhUd6vHVmLBcw0uX-6jMLA", + "ExplorePage": "ZOpNFXhFFI3YQtArxykOLw", + "GenericTimelineById": "wv4VPj4oH-yFD3cuQC7Tbg", "Bookmarks": "2neUNDqrrFzbLui8yallcQ", "ListLatestTweetsTimeline": "RlZzktZY_9wJynoepm8ZsA", "Followers": "IOh4aS6UdGWGJUYTqliQ7Q",