diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py index 1120efc..5def64d 100644 --- a/codesectools/sasts/all/parser.py +++ b/codesectools/sasts/all/parser.py @@ -132,73 +132,85 @@ def stats_by_scores(self) -> dict: for defect_file, defects in defect_files.items(): defects_cwes = {d.cwe for d in defects if d.cwe.id != -1} - defects_same_cwe = 0 + same_cwe = 0 for cwe in defects_cwes: cwes_sasts = {d.sast_name for d in defects if d.cwe == cwe} if set(self.sast_names) == cwes_sasts: - defects_same_cwe += 1 + same_cwe += 1 else: - defects_same_cwe += ( - len(set(self.sast_names) & cwes_sasts) - 1 - ) / len(self.sast_names) + same_cwe += (len(set(self.sast_names) & cwes_sasts) - 1) / len( + self.sast_names + ) + defects_severity = [] defect_locations = {} for defect in defects: + defects_severity.append( + {"error": 1, "warning": 0.5, "note": 0.25, "none": 0.125}[ + defect.level + ] + ) + for line in defect.lines: if not defect_locations.get(line): defect_locations[line] = [] defect_locations[line].append(defect) - defects_same_location = 0 - defects_same_location_same_cwe = 0 + same_location = 0 + same_location_same_cwe = 0 for _, defects_ in defect_locations.items(): + same_location_coeff = 0 if set(defect.sast_name for defect in defects_) == set(self.sast_names): - defects_same_location += 1 - defects_by_cwe = {} - for defect in defects_: - if not defects_by_cwe.get(defect.cwe): - defects_by_cwe[defect.cwe] = [] - defects_by_cwe[defect.cwe].append(defect) - - for _, defects_ in defects_by_cwe.items(): - if set(defect.sast_name for defect in defects_) == set( - self.sast_names - ): - defects_same_location_same_cwe += 1 - else: - defects_same_location_same_cwe += ( + same_location_coeff = 1 + else: + same_location_coeff = ( + len( + set(defect.sast_name for defect in defects_) + & set(self.sast_names) + ) + - 1 + ) / len(set(self.sast_names)) + same_location += same_location_coeff + + defects_by_cwe = {} + for defect in defects_: + if not defects_by_cwe.get(defect.cwe): + defects_by_cwe[defect.cwe] = [] + defects_by_cwe[defect.cwe].append(defect) + + for _, defects_ in defects_by_cwe.items(): + if set(defect.sast_name for defect in defects_) == set( + self.sast_names + ): + same_location_same_cwe += same_location_coeff * 1 + else: + same_location_same_cwe += ( + same_location_coeff + * ( len( set(defect.sast_name for defect in defects_) & set(self.sast_names) ) - 1 - ) / len(self.sast_names) + ) + / len(self.sast_names) + ) stats[defect_file] = { "score": { - "defect_number": len(defects), - "defects_same_cwe": defects_same_cwe * 2, - "defects_same_location": defects_same_location * 4, - "defects_same_location_same_cwe": defects_same_location_same_cwe - * 8, - }, - "count": { - "defect_number": len(defects), - "defects_same_cwe": defects_same_cwe, - "defects_same_location": defects_same_location, - "defects_same_location_same_cwe": defects_same_location_same_cwe, + "severity": sum(defects_severity) / len(defects_severity), + "same_cwe": same_cwe * 2, + "same_location": same_location * 4, + "same_location_same_cwe": same_location_same_cwe * 8, }, } - return stats def prepare_report_data(self) -> dict: """Prepare data needed to generate a report.""" - report = {"score": {}, "files": {}} + report = {} scores = self.stats_by_scores() - report["score"] = {k: 0 for k, _ in list(scores.values())[0]["score"].items()} - defect_files = {} for defect in self.defects: if defect.filepath_str not in defect_files: @@ -206,9 +218,6 @@ def prepare_report_data(self) -> dict: defect_files[defect.filepath_str].append(defect) for defect_file, defects in defect_files.items(): - for k, v in scores[defect_file]["score"].items(): - report["score"][k] += v - locations = [] for defect in defects: for group in group_successive(defect.lines): @@ -217,19 +226,18 @@ def prepare_report_data(self) -> dict: (defect.sast_name, defect.cwe, defect.message, (start, end)) ) - report["files"][defect_file] = { - "score": scores[defect_file]["score"], - "count": scores[defect_file]["count"], + report[defect_file] = { + "score": sum(v for v in scores[defect_file]["score"].values()), "source_path": str(self.source_path / defect.filepath), "locations": locations, "defects": defects, } - report["files"] = { + report = { k: v for k, v in sorted( - report["files"].items(), - key=lambda item: sum(v for v in item[1]["score"].values()), + report.items(), + key=lambda item: item[1]["score"], reverse=True, ) } diff --git a/codesectools/sasts/all/report.py b/codesectools/sasts/all/report.py index 3476bc3..b53c5a8 100644 --- a/codesectools/sasts/all/report.py +++ b/codesectools/sasts/all/report.py @@ -4,10 +4,8 @@ from hashlib import sha256 from pathlib import Path -from rich import print - from codesectools.sasts.all.sast import AllSAST -from codesectools.utils import group_successive, shorten_path +from codesectools.utils import group_successive class ReportEngine: @@ -82,7 +80,7 @@ def __init__(self, project: str, all_sast: AllSAST) -> None: self.result = all_sast.parser.load_from_output_dir(project_name=project) self.report_data = self.result.prepare_report_data() - def generate_single_defect(self, file_data: dict) -> tuple: + def generate_single_defect(self, defect_file: dict) -> str: """Generate the HTML report for a single file with defects.""" from rich.console import Console from rich.style import Style @@ -90,30 +88,9 @@ def generate_single_defect(self, file_data: dict) -> tuple: from rich.table import Table from rich.text import Text - file_report_name = ( - f"{sha256(file_data['source_path'].encode()).hexdigest()}.html" - ) file_page = Console(record=True, file=io.StringIO()) - # Defect stat table - file_stats_table = Table(title="") - for key in list(self.report_data["files"].values())[0]["count"].keys(): - file_stats_table.add_column(key.replace("_", " ").title(), justify="center") - - rendered_scores = [] - for v in file_data["count"].values(): - if isinstance(v, float): - rendered_scores.append(f"~{v}") - else: - rendered_scores.append(str(v)) - - file_stats_table.add_row(*rendered_scores) - file_page.print(file_stats_table) - - file_report_redirect = Text( - shorten_path(file_data["source_path"], 60), - style=Style(link=file_report_name), - ) + file_page.print(f"Score: {defect_file['score']:.2f}") # Defect table defect_table = Table(title="", show_lines=True) @@ -122,7 +99,7 @@ def generate_single_defect(self, file_data: dict) -> tuple: defect_table.add_column("CWE", justify="center") defect_table.add_column("Message") rows = [] - for defect in file_data["defects"]: + for defect in defect_file["defects"]: groups = group_successive(defect.lines) if groups: for group in groups: @@ -161,14 +138,14 @@ def generate_single_defect(self, file_data: dict) -> tuple: file_page.print(defect_table) # Syntax - if not Path(file_data["source_path"]).is_file(): + if not Path(defect_file["source_path"]).is_file(): tippy_calls = "" - print(f"Source file {file_data['source_path']} not found, skipping it...") + print(f"Source file {defect_file['source_path']} not found, skipping it...") else: - syntax = Syntax.from_path(file_data["source_path"], line_numbers=True) + syntax = Syntax.from_path(defect_file["source_path"], line_numbers=True) tooltips = {} highlights = {} - for location in file_data["locations"]: + for location in defect_file["locations"]: sast, cwe, message, (start, end) = location for i in range(start, end + 1): text = ( @@ -199,13 +176,10 @@ def generate_single_defect(self, file_data: dict) -> tuple: html_content = file_page.export_html(code_format=self.TEMPLATE) html_content = html_content.replace('href="HACK', 'id="') - html_content = html_content.replace("[name]", file_data["source_path"]) + html_content = html_content.replace("[name]", defect_file["source_path"]) html_content = html_content.replace("[tippy_calls]", tippy_calls) - report_file = self.report_dir / file_report_name - report_file.write_text(html_content) - - return file_report_redirect, rendered_scores + return html_content def generate(self) -> None: """Generate the HTML report. @@ -215,7 +189,9 @@ def generate(self) -> None: """ from rich.console import Console from rich.progress import track + from rich.style import Style from rich.table import Table + from rich.text import Text self.TEMPLATE = self.TEMPLATE.replace( "[sasts]", ", ".join(sast_name for sast_name in self.result.sast_names) @@ -224,24 +200,38 @@ def generate(self) -> None: home_page = Console(record=True, file=io.StringIO()) main_table = Table(title="") + main_table.add_column("Score", justify="center") main_table.add_column("Files") - for key in list(self.report_data["files"].values())[0]["score"].keys(): - main_table.add_column( - key.replace("_", " ").title(), justify="center", no_wrap=True - ) - for file_data in track( - self.report_data["files"].values(), + for defect_file in track( + self.report_data.values(), description="Generating report for source file with defects...", ): - file_report_redirect, rendered_scores = self.generate_single_defect( - file_data + html_content = self.generate_single_defect(defect_file) + file_report_name = ( + f"{sha256(defect_file['source_path'].encode()).hexdigest()}.html" + ) + file_report_redirect = Text( + str( + Path(defect_file["source_path"]).relative_to( + self.result.source_path + ) # ty:ignore[no-matching-overload] + ), + style=Style(link=file_report_name), + ) + + report_file = self.report_dir / file_report_name + report_file.write_text(html_content) + + main_table.add_row( + Text(f"{defect_file['score']:.2f}"), file_report_redirect ) - main_table.add_row(file_report_redirect, *rendered_scores) home_page.print(main_table) html_content = home_page.export_html(code_format=self.TEMPLATE) - html_content = html_content.replace("[name]", f"Project: {self.project}") + html_content = html_content.replace( + "[name]", f"Project: {self.result.source_path}" + ) report_home_file = self.report_dir / "home.html" report_home_file.write_text(html_content) diff --git a/pyproject.toml b/pyproject.toml index eb5fc40..5c931d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "CodeSecTools" -version = "0.15.0" +version = "0.15.1" description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation." readme = "README.md" license = "AGPL-3.0-only" diff --git a/uv.lock b/uv.lock index fcae0e5..2913e35 100644 --- a/uv.lock +++ b/uv.lock @@ -239,7 +239,7 @@ wheels = [ [[package]] name = "codesectools" -version = "0.15.0" +version = "0.15.1" source = { editable = "." } dependencies = [ { name = "gitpython" },