diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 275605eb..6f9c2b7c 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -210,3 +210,9 @@ types: [text] stages: [pre-commit, pre-push, manual] minimum_pre_commit_version: 3.2.0 + +- id: detect-secrets + name: detects secrets from custom regex file + description: can take in a custom regex file to scan for custom secrets. + entry: detect-secrets + langauge: python diff --git a/pre_commit_hooks/detect_secrets.py b/pre_commit_hooks/detect_secrets.py new file mode 100644 index 00000000..7a0f5375 --- /dev/null +++ b/pre_commit_hooks/detect_secrets.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import argparse +import re +import subprocess +from collections.abc import Sequence +from pathlib import Path + + +# ------------------------- +# Default secret patterns +# ------------------------- + +DEFAULT_PATTERNS: dict[str, str] = { + # GitLab + 'gitlab_pat': r'glpat-[0-9A-Za-z_-]{20,}', + 'gitlab_runner_token': r'glrt-[0-9A-Za-z_-]{20,}', + + # GitHub + 'github_pat': r'ghp_[0-9A-Za-z]{36}', + 'github_fine_grained_pat': r'github_pat_[0-9A-Za-z_]{82}', + + # AWS + 'aws_access_key': r'AKIA[0-9A-Z]{16}', + 'aws_secret_key': r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]", + + # Generic + 'generic_secret': r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]", +} + + +def load_custom_patterns(path: Path) -> dict[str, str]: + patterns: dict[str, str] = {} + for i, line in enumerate(path.read_text().splitlines(), start=1): + line = line.strip() + if not line or line.startswith('#'): + continue + patterns[f"custom_rule_{i}"] = line + return patterns + + +def is_binary(data: bytes) -> bool: + return b'\x00' in data + + +def git_tracked_files() -> list[Path]: + """Return all git-tracked files in the repo.""" + result = subprocess.run( + ['git', 'ls-files'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + check=False, + ) + return [Path(p) for p in result.stdout.splitlines() if p] + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description='Detect exposed secrets in repository') + parser.add_argument( + '--rules', + type=Path, + help='File containing custom regex rules (one per line)', + ) + parser.add_argument( + 'filenames', + nargs='*', + help='Files to scan (if empty, scans entire repo)', + ) + + args = parser.parse_args(argv) + + patterns = dict(DEFAULT_PATTERNS) + + if args.rules: + if not args.rules.is_file(): + print(f"Rules file not found: {args.rules}") + return 2 + patterns.update(load_custom_patterns(args.rules)) + + compiled = { + name: re.compile(regex) + for name, regex in patterns.items() + } + + files: list[Path] + if args.filenames: + files = [Path(f) for f in args.filenames] + else: + files = git_tracked_files() + + findings: list[tuple[Path, str]] = [] + + for path in files: + if not path.is_file(): + continue + + try: + data = path.read_bytes() + except OSError: + continue + + if is_binary(data): + continue + + text = data.decode(errors='ignore') + + for rule, regex in compiled.items(): + if regex.search(text): + findings.append((path, rule)) + + if findings: + print('Potential secrets detected:') + for path, rule in findings: + print(f" - {path} (matched: {rule})") + return 1 + + return 0 + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index d91f4399..f5876526 100644 --- a/setup.cfg +++ b/setup.cfg @@ -60,6 +60,7 @@ console_scripts = requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:main sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main + detect-secrets = pre_commit_hooks.detect_secrets:main [bdist_wheel] universal = True