diff --git a/assets/sec_ch_ua.json b/assets/sec_ch_ua.json new file mode 100644 index 0000000..626ff96 --- /dev/null +++ b/assets/sec_ch_ua.json @@ -0,0 +1 @@ +"{\"132.0.0.0\": \"\\\"Not A(Brand\\\";v=\\\"8\\\", \\\"Chromium\\\";v=\\\"132\\\", \\\"Google Chrome\\\";v=\\\"132\\\"\", \"131.0.0.0\": \"\\\"Google Chrome\\\";v=\\\"131\\\", \\\"Chromium\\\";v=\\\"131\\\", \\\"Not_A Brand\\\";v=\\\"24\\\"\", \"130.0.0.0\": \"\\\"Chromium\\\";v=\\\"130\\\", \\\"Google Chrome\\\";v=\\\"130\\\", \\\"Not?A_Brand\\\";v=\\\"99\\\"\", \"129.0.0.0\": \"\\\"Google Chrome\\\";v=\\\"129\\\", \\\"Not=A?Brand\\\";v=\\\"8\\\", \\\"Chromium\\\";v=\\\"129\\\"\"}" \ No newline at end of file diff --git a/src/masquer/utils/assets.py b/src/masquer/utils/assets.py index 9d5df96..f701740 100644 --- a/src/masquer/utils/assets.py +++ b/src/masquer/utils/assets.py @@ -1,53 +1,6 @@ -HEADER_DATA = { - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "Accept-Encoding": "gzip, deflate, br", - "Accept-Language": "en-US,en;q=0.5;", - "Referer": "https://www.google.com/", - "Sec-Fetch-Dest": "document", - "Sec-Fetch-Mode": "navigate", - "Sec-Fetch-Site": "none", - "Sec-Fetch-User": "?1", - "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.3", -} -REFERERS = [ - "https://www.google.com", - "https://bing.com", - "https://yandex.com", - "https://search.yahoo.com", - "https://duckduckgo.com", - "https://www.baidu.com", -] +HEADER_DATA = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5;', 'Referer': 'https://www.google.com/', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.3'} +REFERERS = ['https://www.google.com', 'https://bing.com', 'https://yandex.com', 'https://search.yahoo.com', 'https://duckduckgo.com', 'https://www.baidu.com'] REFERER_WEIGHTS = [79.1, 11.92, 3.02, 2.99, 0.84, 0.77] -USERAGENTS = [ - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.", - "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Herring/97.1.8280.8", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 AtContent/95.5.5462.5", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.1958", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3", -] -USERAGENT_WEIGHTS = [ - 31.48, - 24.07, - 17.59, - 7.41, - 4.63, - 3.7, - 2.78, - 1.85, - 1.85, - 0.93, - 0.93, - 0.93, - 0.93, - 0.93, -] +USERAGENTS = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Herring/97.1.8280.8', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 AtContent/95.5.5462.5', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.1958', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3'] +USERAGENT_WEIGHTS = [31.48, 24.07, 17.59, 7.41, 4.63, 3.7, 2.78, 1.85, 1.85, 0.93, 0.93, 0.93, 0.93, 0.93] +SEC_CH_UA = {"132.0.0.0": "\"Not A(Brand\";v=\"8\", \"Chromium\";v=\"132\", \"Google Chrome\";v=\"132\"", "131.0.0.0": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", "130.0.0.0": "\"Chromium\";v=\"130\", \"Google Chrome\";v=\"130\", \"Not?A_Brand\";v=\"99\"", "129.0.0.0": "\"Google Chrome\";v=\"129\", \"Not=A?Brand\";v=\"8\", \"Chromium\";v=\"129\""} diff --git a/src/masquer/utils/response.py b/src/masquer/utils/response.py index f103d5c..0deddb5 100644 --- a/src/masquer/utils/response.py +++ b/src/masquer/utils/response.py @@ -4,8 +4,9 @@ REFERER_WEIGHTS, USERAGENTS, USERAGENT_WEIGHTS, + SEC_CH_UA ) -from .select import select_data +from .select import select_ch_ua, select_data def get_response( @@ -36,5 +37,11 @@ def get_response( if useragent_requested: useragent = select_data(USERAGENTS, USERAGENT_WEIGHTS) response_data["User-Agent"] = useragent + if "Safari/537" in useragent: + sec_dict = select_ch_ua(useragent, SEC_CH_UA) + if(sec_dict): + response_data['Sec-CH-UA'] = sec_dict['sec-ch-ua'] + response_data['Sec-CH-UA-Mobile'] = sec_dict['sec-ch-ua-mobile'] + response_data['Sec-CH-UA-Platform'] = sec_dict['sec-ch-ua-platform'] return response_data diff --git a/src/masquer/utils/select.py b/src/masquer/utils/select.py index 5cd90b0..d2bd8f2 100644 --- a/src/masquer/utils/select.py +++ b/src/masquer/utils/select.py @@ -1,5 +1,7 @@ import random +import re +VERSION_RE = re.compile(r"Chrome\/(\d+\.\d+\.\d+\.\d+)") def select_data(sample_space: list[str], weights: list[float]) -> str: """ @@ -21,3 +23,73 @@ def select_data(sample_space: list[str], weights: list[float]) -> str: selection = random.choices(sample_space, weights=weights, k=1) return selection[0] + +def select_ch_ua(user_agent: str, sec_ch_uas: dict) -> dict: + """ + Returns the Chrome sec-ch-ua from a user-agent string + """ + + # Get the obvious two, mobile and platform + sec_dict = {} + if is_mobile(user_agent): + sec_dict["sec-ch-ua-mobile"] = "?1" + else: + sec_dict["sec-ch-ua-mobile"] = "?0" + + sec_dict["sec-ch-ua-platform"] = get_platform(user_agent) + + # Now for the rest - the actual UA + # This is three steps: + # - Identify the version of Chrome from user agent + # - Identify browser name from user agent + # - replace "Google Chrome" with browser name + try: + version = VERSION_RE.search(user_agent) + sec_ch_ua = sec_ch_uas[version.group(1)] + sec_ch_ua = sec_ch_ua.replace('Google Chrome', find_browser_name(user_agent)) + sec_dict["sec-ch-ua"] = sec_ch_ua + return sec_dict + except: + return None + + +def is_mobile(user_agent: str) -> bool: + """ + Checks whether a user-agent is mobile + Returns True if mobile, else False + """ + return "Mobile" in user_agent + +def get_platform(user_agent: str) -> str: + """ + Returns the platform of a user-agent + """ + if "Android" in user_agent: + return '"Android"' + elif "Linux x86_64" in user_agent: + return '"Linux"' + elif "Windows NT" in user_agent: + return '"Windows"' + elif "Macintosh" in user_agent: + return '"macOS"' + elif "iPhone" in user_agent: + return '"iOS"' + else: + return "Unknown" + +def find_browser_name(user_agent: str) -> str: + """ + Returns the browser name from a user-agent string + """ + if "Brave" in user_agent: + return "Brave" + elif "Edg" in user_agent: + return "Microsoft Edge" + elif "OPR" in user_agent: + return "Opera" + elif "SamsungBrowser" in user_agent: + return "Samsung Internet" + elif "OPX" in user_agent: + return "Opera GX" + else: + return "Google Chrome" \ No newline at end of file diff --git a/update.py b/update.py index bb7cc12..1e10557 100644 --- a/update.py +++ b/update.py @@ -1,5 +1,7 @@ +import base64 import json import os +import re import requests import sys from bs4 import BeautifulSoup @@ -12,6 +14,7 @@ ROOT_DIR = os.path.abspath(os.path.dirname(__file__)) ASSETS_DIR = os.path.join(ROOT_DIR, "assets") UTILS_DIR = os.path.join(ROOT_DIR, "src", "masquer", "utils") +VERSION_RE = re.compile(r"Chrome\/(\d+\.\d+\.\d+\.\d+)") def update_useragents() -> bool: @@ -97,6 +100,58 @@ def extract_data(json_file_path: str) -> dict | list[dict]: data = json.load(f) return data +def update_sec_ch_ua() -> bool: + """Gets latest referer stats and saves them to JSON file""" + REPO_OWNER = "fa0311" + REPO_NAME = "latest-user-agent" + BASE_URL = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}" + FILE_PATH = "header.json" + + latest_commits = get_latest_commits(f"{BASE_URL}/commits") + sec_ch_uas = {} + logger.info("Fetched sec-ch-ua data") + for commit in latest_commits: + file_content = get_file_content_at_commit(f"{BASE_URL}/contents/{FILE_PATH}", commit) + version, sec_ch_ua = extract_sec_ch_ua(file_content) + if sec_ch_ua: + sec_ch_uas[version] = sec_ch_ua + json_string = json.dumps(sec_ch_uas) + with open(os.path.join(ASSETS_DIR, "sec_ch_ua.json"), "w") as f: + json.dump(json_string, f) + return True + + +def get_latest_commits(url: str): + """Fetch the latest 5 commits that modified the file.""" + FILE_PATH = "header.json" + params = { + "path": FILE_PATH, + "per_page": 10 # Limit to the latest 5 commits + } + response = requests.get(url, params=params) + response.raise_for_status() + return [commit["sha"] for commit in response.json()] + + +def get_file_content_at_commit(url: str, commit_sha: str): + """Fetch the file content at a specific commit.""" + params = { + "ref": commit_sha + } + response = requests.get(url, params=params) + response.raise_for_status() + content = response.json()["content"] + # Decode the base64-encoded content + return base64.b64decode(content).decode("utf-8") + +def extract_sec_ch_ua(json_content): + """Extract the 'sec-ch-ua' value from the JSON content.""" + data = json.loads(json_content) + user_agent = data.get("chrome", {}).get("user-agent", None) + if not user_agent: + return None, None + version = VERSION_RE.search(user_agent) + return version.group(1), data.get("chrome", {}).get("sec-ch-ua", None) def update_assets() -> bool: """ @@ -109,6 +164,7 @@ def update_assets() -> bool: header_data = extract_data(os.path.join(ASSETS_DIR, "header.json")) referer_data = extract_data(os.path.join(ASSETS_DIR, "referers.json")) useragent_data = extract_data(os.path.join(ASSETS_DIR, "useragents.json")) + sec_ch_ua_data = extract_data(os.path.join(ASSETS_DIR, "sec_ch_ua.json")) referers = [obj["ref"] for obj in referer_data] referer_weights = [obj["pct"] for obj in referer_data] @@ -129,8 +185,10 @@ def update_assets() -> bool: f.write("\n") f.write("USERAGENT_WEIGHTS = " + str(useragent_weights)) f.write("\n") + f.write("SEC_CH_UA = " + str(sec_ch_ua_data)) + f.write("\n") - logger.info("Saved user-agent and referer JSON data to assets.py") + logger.info("Saved user-agent, referer and sec-ch-ua JSON data to assets.py") return True except FileNotFoundError: @@ -145,7 +203,9 @@ def update_assets() -> bool: if __name__ == "__main__": ua = update_useragents() rf = update_referers() - if ua and rf: + sec_ch_ua = update_sec_ch_ua() + + if ua and rf and sec_ch_ua: assets_updated = update_assets() if assets_updated: sys.exit(0)