-
Notifications
You must be signed in to change notification settings - Fork 79
Expand file tree
/
Copy pathurlscan.py
More file actions
104 lines (80 loc) · 3.14 KB
/
urlscan.py
File metadata and controls
104 lines (80 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/local/python3.13/bin/python3.13
import requests
import argparse
import re
import os
API_KEY = "xxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # ← Insert your API Key here
if not API_KEY:
print("[ERROR] Please add your API key in the script before running.")
exit(1)
parser = argparse.ArgumentParser(description="Automating usage of urlscanner.io")
parser.add_argument('-m', '--mode', required=True, choices=['subdomains', 'urls'], help="Mode to scan: subdomains or urls.")
parser.add_argument('-d', '--domain', help="Single domain to scan.")
parser.add_argument('-df', '--domain-file', help="File containing multiple domains to scan.")
args = parser.parse_args()
def print_banner():
print(r'''
__ _
__ _______/ /_____________ _____ (_)___
/ / / / ___/ / ___/ ___/ __ `/ __ \ / / __ \
/ /_/ / / / (__ ) /__/ /_/ / / / / / / /_/ /
\__,_/_/ /_/____/\___/\__,_/_/ /_(_)_/\____/
Created by: coffinxp
''')
def sanitize_domain(domain):
domain = domain.strip().lower()
domain = re.sub(r'^https?://', '', domain) # remove http/https
return domain if domain and not domain.startswith('#') else None
def safe_request(url, headers):
try:
return requests.get(url, headers=headers, timeout=10)
except requests.RequestException:
return None
def dedup_and_sort(items):
return sorted(set(items))
def scan_domain(domain, mode, api_key):
domain = sanitize_domain(domain)
if not domain:
return
url = f"https://urlscan.io/api/v1/search/?q=page.domain:{domain}&size=100"
headers = {"API-Key": api_key}
response = safe_request(url, headers=headers)
if not response:
return
results = []
if mode == "subdomains":
matched = re.findall(rf"https?://((?:[a-zA-Z0-9_-]+\.)+{re.escape(domain)})", response.text)
stripped = [re.sub(r"^https?://", "", url) for url in matched]
filtered = [url for url in stripped if url != domain]
results = [url.split("/")[0] for url in filtered]
elif mode == "urls":
matched = re.findall(rf"https?://(?:[a-zA-Z0-9_-]+\.)+{re.escape(domain)}/[^\s\"'>]+", response.text)
results = matched
unique = dedup_and_sort(results)
for item in unique:
print(item) # Show on screen
# ---------------- MAIN ----------------
print_banner()
domains_to_scan = []
# Use single domain
if args.domain:
single = sanitize_domain(args.domain)
if single:
domains_to_scan = [single]
else:
print("[!] Invalid domain input.")
exit(1)
# Use domain file
elif args.domain_file:
if os.path.isfile(args.domain_file):
with open(args.domain_file, 'r') as f:
domains_to_scan = [sanitize_domain(line) for line in f if sanitize_domain(line)]
else:
print(f"[!] File not found: {args.domain_file}")
exit(1)
else:
print("[!] Please provide either a single domain (-d) or a domain file (-df).")
exit(1)
for domain in domains_to_scan:
scan_domain(domain, args.mode, API_KEY)
print(f"\n[✔] Completed scan for {len(domains_to_scan)} domain(s).")