Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions backend/app/downloaders/bilibili_subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

流程:
1. 从 URL 提 BV id(已有 utils.url_parser.extract_video_id)
2. GET /x/web-interface/view?bvid=BVxxx → 拿 cid
3. GET /x/player/wbi/v2?bvid=...&cid=... → 返回 data.subtitle.subtitles[]
2. 从 URL 提 p 参数(分 P 序号,已有 utils.url_parser.extract_bilibili_p_number)
3. GET /x/web-interface/view?bvid=BVxxx&p=N → 拿第 N 集的 cid
4. GET /x/player/wbi/v2?bvid=...&cid=... → 返回 data.subtitle.subtitles[]
每条带 subtitle_url(B 站后端已经签好 auth_key 的完整地址)
4. 按优先级(人工 zh-CN > AI zh-CN > 任意 zh > 任意非空)选一条
5. fetch subtitle_url → JSON {body:[{from,to,content,...}]}
6. 解析为 TranscriptResult
5. 按优先级(人工 zh-CN > AI zh-CN > 任意 zh > 任意非空)选一条
6. fetch subtitle_url → JSON {body:[{from,to,content,...}]}
7. 解析为 TranscriptResult

AI 字幕需要登录态 cookie(SESSDATA);通过 CookieConfigManager 注入。
"""
Expand All @@ -20,7 +21,7 @@
from app.models.transcriber_model import TranscriptResult, TranscriptSegment
from app.services.cookie_manager import CookieConfigManager
from app.utils.logger import get_logger
from app.utils.url_parser import extract_video_id
from app.utils.url_parser import extract_video_id, extract_bilibili_p_number

logger = get_logger(__name__)

Expand All @@ -45,17 +46,33 @@ def _headers(self) -> dict:
h["Cookie"] = self._cookie
return h

def _get_cid(self, bvid: str) -> Optional[int]:
def _get_cid(self, bvid: str, p: Optional[int] = None) -> Optional[int]:
url = "https://api.bilibili.com/x/web-interface/view"
params = {"bvid": bvid}
if p is not None and p >= 1:
params["p"] = p
try:
resp = requests.get(url, params={"bvid": bvid}, headers=self._headers(), timeout=10)
resp = requests.get(url, params=params, headers=self._headers(), timeout=10)
data = resp.json()
except Exception as e:
logger.warning(f"获取 cid 失败: {e}")
return None
if data.get("code") != 0:
logger.warning(f"view API 返回错误: code={data.get('code')}, msg={data.get('message')}")
return None
# 分 P 视频:data.pages[N-1] 对应第 N 集
pages = data.get("data", {}).get("pages", [])
if pages:
if p is not None and 1 <= p <= len(pages):
cid = pages[p - 1].get("cid")
logger.info(f"分 P 视频: bvid={bvid} p={p} 共 {len(pages)} 集, 取第 {p} 集 cid={cid}")
return int(cid) if cid else None
else:
# 没有 p 参数或 p 超出范围,取第 1 集
cid = pages[0].get("cid")
logger.info(f"非分 P 或 p 无效: bvid={bvid} 取第 1 集 cid={cid}")
return int(cid) if cid else None
# 单集视频
cid = data.get("data", {}).get("cid")
return int(cid) if cid else None

Expand Down Expand Up @@ -114,9 +131,12 @@ def fetch_subtitles(self, video_url: str) -> Optional[TranscriptResult]:
logger.info("无法从 URL 提取 BV id")
return None

cid = self._get_cid(bvid)
# 提取分 P 序号
p = extract_bilibili_p_number(video_url)

cid = self._get_cid(bvid, p)
if not cid:
logger.info(f"{bvid} 没有取到 cid")
logger.info(f"{bvid} (p={p}) 没有取到 cid")
return None

subtitles = self._list_subtitles(bvid, cid)
Expand Down Expand Up @@ -149,7 +169,7 @@ def fetch_subtitles(self, video_url: str) -> Optional[TranscriptResult]:
return None

full_text = " ".join(s.text for s in segments)
logger.info(f"B站直拉字幕成功: {bvid} lan={lan} 共 {len(segments)} 段")
logger.info(f"B站直拉字幕成功: {bvid} p={p} lan={lan} 共 {len(segments)} 段")
return TranscriptResult(
language=lan,
full_text=full_text,
Expand All @@ -158,6 +178,7 @@ def fetch_subtitles(self, video_url: str) -> Optional[TranscriptResult]:
"source": "bilibili_player_api",
"bvid": bvid,
"cid": cid,
"p": p,
"lan": lan,
"ai_type": track.get("ai_type"),
},
Expand Down
33 changes: 32 additions & 1 deletion backend/app/utils/url_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from typing import Optional
from typing import Optional, Tuple
import requests


Expand Down Expand Up @@ -48,3 +48,34 @@ def resolve_bilibili_short_url(short_url: str) -> Optional[str]:
except requests.RequestException as e:
print(f"Error resolving short URL: {e}")
return None


def extract_bilibili_p_number(url: str) -> Optional[int]:
"""
从 B 站分 P 视频 URL 中提取 p 参数(分 P 序号)。

支持格式:
- https://www.bilibili.com/video/BVxxx/?p=36
- https://www.bilibili.com/video/BVxxx?p=5
- https://b23.tv/xxxxx?p=10
- https://www.bilibili.com/video/BVxxx/pN (尾缀形式)

:param url: B 站视频链接
:return: 分 P 序号(从 1 开始),非分 P 视频返回 None
"""
if "b23.tv" in url:
url = resolve_bilibili_short_url(url) or url

# 匹配 ?p=NNN 或 &p=NNN
match = re.search(r'[?&]p=(\d+)', url)
if match:
p = int(match.group(1))
if p >= 1:
return p

# 匹配 /pN 尾缀形式(较少见)
match = re.search(r'/p(\d+)(?:/?$|\?|&)', url)
if match:
return int(match.group(1))

return None