From 8f9d35b3b8cecbb249f9a45a062a8ada7b422449 Mon Sep 17 00:00:00 2001 From: Sten Leinasaar <83982253+StenLeinasaar@users.noreply.github.com> Date: Fri, 12 Dec 2025 13:17:44 +0200 Subject: [PATCH 1/2] added custom secrets management --- .pre-commit-hooks.yaml | 7 ++ pre_commit_hooks/detect_secrets.py | 123 +++++++++++++++++++++++++++++ setup.cfg | 1 + 3 files changed, 131 insertions(+) create mode 100644 pre_commit_hooks/detect_secrets.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 275605eb..1b5810d3 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -210,3 +210,10 @@ types: [text] stages: [pre-commit, pre-push, manual] minimum_pre_commit_version: 3.2.0 + +- id: detect-secrets + name: detects secrets from custom regex file + description: can take in a custom regex file to scan for custom secrets. + entry: detect-secrets + langauge: python + diff --git a/pre_commit_hooks/detect_secrets.py b/pre_commit_hooks/detect_secrets.py new file mode 100644 index 00000000..72a478a9 --- /dev/null +++ b/pre_commit_hooks/detect_secrets.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import argparse +import re +import subprocess +from collections.abc import Sequence +from pathlib import Path + + +# ------------------------- +# Default secret patterns +# ------------------------- + +DEFAULT_PATTERNS: dict[str, str] = { + # GitLab + "gitlab_pat": r"glpat-[0-9A-Za-z_-]{20,}", + "gitlab_runner_token": r"glrt-[0-9A-Za-z_-]{20,}", + + # GitHub + "github_pat": r"ghp_[0-9A-Za-z]{36}", + "github_fine_grained_pat": r"github_pat_[0-9A-Za-z_]{82}", + + # AWS + "aws_access_key": r"AKIA[0-9A-Z]{16}", + "aws_secret_key": r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]", + + # Generic + "generic_secret": r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]", +} + + + +def load_custom_patterns(path: Path) -> dict[str, str]: + patterns: dict[str, str] = {} + for i, line in enumerate(path.read_text().splitlines(), start=1): + line = line.strip() + if not line or line.startswith("#"): + continue + patterns[f"custom_rule_{i}"] = line + return patterns + + +def is_binary(data: bytes) -> bool: + return b"\x00" in data + + +def git_tracked_files() -> list[Path]: + """Return all git-tracked files in the repo.""" + result = subprocess.run( + ["git", "ls-files"], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + check=False, + ) + return [Path(p) for p in result.stdout.splitlines() if p] + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Detect exposed secrets in repository") + parser.add_argument( + "--rules", + type=Path, + help="File containing custom regex rules (one per line)", + ) + parser.add_argument( + "filenames", + nargs="*", + help="Files to scan (if empty, scans entire repo)", + ) + + args = parser.parse_args(argv) + + patterns = dict(DEFAULT_PATTERNS) + + if args.rules: + if not args.rules.is_file(): + print(f"Rules file not found: {args.rules}") + return 2 + patterns.update(load_custom_patterns(args.rules)) + + compiled = { + name: re.compile(regex) + for name, regex in patterns.items() + } + + files: list[Path] + if args.filenames: + files = [Path(f) for f in args.filenames] + else: + files = git_tracked_files() + + findings: list[tuple[Path, str]] = [] + + for path in files: + if not path.is_file(): + continue + + try: + data = path.read_bytes() + except OSError: + continue + + if is_binary(data): + continue + + text = data.decode(errors="ignore") + + for rule, regex in compiled.items(): + if regex.search(text): + findings.append((path, rule)) + + if findings: + print("Potential secrets detected:") + for path, rule in findings: + print(f" - {path} (matched: {rule})") + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index d91f4399..f5876526 100644 --- a/setup.cfg +++ b/setup.cfg @@ -60,6 +60,7 @@ console_scripts = requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:main sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main + detect-secrets = pre_commit_hooks.detect_secrets:main [bdist_wheel] universal = True From 16d54fd3e50bb3285474a08084012cf69767b801 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Dec 2025 11:20:25 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .pre-commit-hooks.yaml | 1 - pre_commit_hooks/detect_secrets.py | 39 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 1b5810d3..6f9c2b7c 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -216,4 +216,3 @@ description: can take in a custom regex file to scan for custom secrets. entry: detect-secrets langauge: python - diff --git a/pre_commit_hooks/detect_secrets.py b/pre_commit_hooks/detect_secrets.py index 72a478a9..7a0f5375 100644 --- a/pre_commit_hooks/detect_secrets.py +++ b/pre_commit_hooks/detect_secrets.py @@ -13,41 +13,40 @@ DEFAULT_PATTERNS: dict[str, str] = { # GitLab - "gitlab_pat": r"glpat-[0-9A-Za-z_-]{20,}", - "gitlab_runner_token": r"glrt-[0-9A-Za-z_-]{20,}", + 'gitlab_pat': r'glpat-[0-9A-Za-z_-]{20,}', + 'gitlab_runner_token': r'glrt-[0-9A-Za-z_-]{20,}', # GitHub - "github_pat": r"ghp_[0-9A-Za-z]{36}", - "github_fine_grained_pat": r"github_pat_[0-9A-Za-z_]{82}", + 'github_pat': r'ghp_[0-9A-Za-z]{36}', + 'github_fine_grained_pat': r'github_pat_[0-9A-Za-z_]{82}', # AWS - "aws_access_key": r"AKIA[0-9A-Z]{16}", - "aws_secret_key": r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]", + 'aws_access_key': r'AKIA[0-9A-Z]{16}', + 'aws_secret_key': r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]", # Generic - "generic_secret": r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]", + 'generic_secret': r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]", } - def load_custom_patterns(path: Path) -> dict[str, str]: patterns: dict[str, str] = {} for i, line in enumerate(path.read_text().splitlines(), start=1): line = line.strip() - if not line or line.startswith("#"): + if not line or line.startswith('#'): continue patterns[f"custom_rule_{i}"] = line return patterns def is_binary(data: bytes) -> bool: - return b"\x00" in data + return b'\x00' in data def git_tracked_files() -> list[Path]: """Return all git-tracked files in the repo.""" result = subprocess.run( - ["git", "ls-files"], + ['git', 'ls-files'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, @@ -57,16 +56,16 @@ def git_tracked_files() -> list[Path]: def main(argv: Sequence[str] | None = None) -> int: - parser = argparse.ArgumentParser(description="Detect exposed secrets in repository") + parser = argparse.ArgumentParser(description='Detect exposed secrets in repository') parser.add_argument( - "--rules", + '--rules', type=Path, - help="File containing custom regex rules (one per line)", + help='File containing custom regex rules (one per line)', ) parser.add_argument( - "filenames", - nargs="*", - help="Files to scan (if empty, scans entire repo)", + 'filenames', + nargs='*', + help='Files to scan (if empty, scans entire repo)', ) args = parser.parse_args(argv) @@ -104,14 +103,14 @@ def main(argv: Sequence[str] | None = None) -> int: if is_binary(data): continue - text = data.decode(errors="ignore") + text = data.decode(errors='ignore') for rule, regex in compiled.items(): if regex.search(text): findings.append((path, rule)) if findings: - print("Potential secrets detected:") + print('Potential secrets detected:') for path, rule in findings: print(f" - {path} (matched: {rule})") return 1 @@ -119,5 +118,5 @@ def main(argv: Sequence[str] | None = None) -> int: return 0 -if __name__ == "__main__": +if __name__ == '__main__': raise SystemExit(main())