Source code for firecrown.fctools.coverage_to_tsv

#!/usr/bin/env python
"""Convert pytest-cov JSON coverage data to TSV format.

This script reads a coverage.json file generated by pytest-cov and extracts
key information into a tab-separated values (TSV) file for easy analysis.
"""

import json
import re
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any, NamedTuple

import typer
from rich.console import Console

if TYPE_CHECKING:
    from .common import load_json_file
else:
    try:
        from .common import load_json_file
    except ImportError:  # pragma: no cover
        from common import load_json_file


[docs] class CoverageRecord(NamedTuple): """Structure for holding coverage data for a single function.""" file_path: str function_name: str covered_lines: int total_statements: int percent_covered: float missing_lines: int excluded_lines: int num_branches: int covered_branches: int missing_branches: int num_partial_branches: int percent_covered_display: str file_total_statements: int file_covered_lines: int file_percent_covered: float test_duration: float | None = None
def _load_json_timing(path: Path) -> dict[str, float]: """Load timing data from JSON file (pytest-json-report format).""" with open(path, encoding="utf-8") as f: data = json.load(f) timings: dict[str, float] = {} if "tests" in data: for test in data["tests"]: test_name = test.get("nodeid", "") duration = test.get("duration", 0.0) try: timings[test_name] = float(duration or 0.0) except (TypeError, ValueError): timings[test_name] = 0.0 return timings def _parse_duration_line(line: str) -> tuple[str, float] | None: """Parse a single duration line from pytest --durations output.""" duration_pattern = r"(\d+\.?\d*s)\s+(call|setup|teardown)?\s*(.+)" m = re.search(duration_pattern, line.strip()) if not m: return None duration_str = m.group(1).rstrip("s") test_name = m.group(3) try: duration = float(duration_str) except ValueError: # pragma: no cover # Defensive: regex pattern ensures duration_str is always a valid float return None return test_name, duration def _load_text_durations(path: Path) -> dict[str, float]: """Load timing data from text file (pytest --durations output).""" timings: dict[str, float] = {} with open(path, encoding="utf-8") as f: for line in f: parsed = _parse_duration_line(line) if parsed is None: continue test_name, duration = parsed timings[test_name] = timings.get(test_name, 0.0) + duration return timings
[docs] def parse_timing_data(console: Console, timing_file: Path | None) -> dict[str, float]: """Parse timing data from pytest --durations output or JSON file. Args: console: The rich console object. timing_file: Path to timing data file (JSON or text output) Returns: Dictionary mapping test names to duration in seconds """ if timing_file is None or not timing_file.exists(): return {} try: return _load_json_timing(timing_file) except json.JSONDecodeError: # Try plain-text durations output try: return _load_text_durations(timing_file) except OSError: msg = f"Warning: Could not parse timing data from {timing_file}" console.print(f"[yellow]{msg}[/yellow]") return {} except OSError: console.print( f"[yellow]Warning: Could not read timing file {timing_file}[/yellow]" ) return {} return {}
[docs] def match_test_to_function(test_name: str, function_name: str, file_path: str) -> float: """Try to match a test name to a function for timing correlation. Args: test_name: Full test identifier (like "tests/test_mod.py::test_method") function_name: Function name from coverage data file_path: File path from coverage data Returns: Relevance score (0.0 to 1.0) for the match """ # Extract the base test file and method name if "::" in test_name: parts = test_name.split("::") test_file = parts[0] test_method = parts[-1] else: test_file = test_name test_method = "" score = 0.0 # Check if test file corresponds to the source file if ( file_path in test_file or test_file.replace("tests/", "").replace("test_", "") in file_path ): score += 0.3 # Check if function name appears in test method name if function_name.lower() in test_method.lower(): score += 0.5 # Check if test method name appears in function name if test_method.replace("test_", "").lower() in function_name.lower(): score += 0.4 return min(score, 1.0)
[docs] def extract_coverage_data( coverage_data: dict[str, Any], timing_data: dict[str, float] | None = None ) -> list[CoverageRecord]: """Extract coverage information from the JSON data, optionally including timing. Args: coverage_data: The loaded JSON coverage data timing_data: Optional dictionary of test timing data Returns: List of CoverageRecord objects containing detailed coverage information """ # pylint: disable=too-many-locals results = [] timing_data = timing_data or {} # Iterate through files in coverage data for file_path, file_data in coverage_data.get("files", {}).items(): if "functions" not in file_data: continue # Get file-level summary data file_summary = file_data.get("summary", {}) file_total_statements = file_summary.get("num_statements", 0) file_covered_lines = file_summary.get("covered_lines", 0) file_percent_covered = file_summary.get("percent_covered", 0.0) # Extract function-level coverage for function_name, func_data in file_data["functions"].items(): summary = func_data.get("summary", {}) # Try to find matching timing data best_duration = None best_score = 0.0 if timing_data: for test_name, duration in timing_data.items(): score = match_test_to_function(test_name, function_name, file_path) if ( score > best_score and score > 0.3 ): # Minimum threshold for matching best_score = score best_duration = duration record = CoverageRecord( file_path=file_path, function_name=function_name, covered_lines=summary.get("covered_lines", 0), total_statements=summary.get("num_statements", 0), percent_covered=summary.get("percent_covered", 0.0), missing_lines=summary.get("missing_lines", 0), excluded_lines=summary.get("excluded_lines", 0), num_branches=summary.get("num_branches", 0), covered_branches=summary.get("covered_branches", 0), missing_branches=summary.get("missing_branches", 0), num_partial_branches=summary.get("num_partial_branches", 0), percent_covered_display=summary.get("percent_covered_display", "0"), file_total_statements=file_total_statements, file_covered_lines=file_covered_lines, file_percent_covered=file_percent_covered, test_duration=best_duration, ) results.append(record) return results
[docs] def write_tsv_file(data: list[CoverageRecord], output_file: Path) -> None: """Write the extracted data to a TSV file. Args: data: List of CoverageRecord objects with coverage data output_file: Path to output TSV file """ with open(output_file, "w", encoding="utf-8") as f: # Write header headers = [ "file_path", "function_name", "covered_lines", "total_statements", "percent_covered", "missing_lines", "excluded_lines", "num_branches", "covered_branches", "missing_branches", "num_partial_branches", "percent_covered_display", "file_total_statements", "file_covered_lines", "file_percent_covered", "test_duration_seconds", ] f.write("\t".join(headers) + "\n") # Write data rows for record in data: duration_str = ( str(record.test_duration) if record.test_duration is not None else "" ) row = [ record.file_path, record.function_name, str(record.covered_lines), str(record.total_statements), str(record.percent_covered), str(record.missing_lines), str(record.excluded_lines), str(record.num_branches), str(record.covered_branches), str(record.missing_branches), str(record.num_partial_branches), record.percent_covered_display, str(record.file_total_statements), str(record.file_covered_lines), str(record.file_percent_covered), duration_str, ] f.write("\t".join(row) + "\n")
app = typer.Typer()
[docs] @app.command() def main( input_file: Path = typer.Argument( ..., exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True, help="Path to the input JSON coverage file", ), output_file: Path = typer.Argument( "coverage_data.tsv", help="Path to the output TSV file", writable=True, resolve_path=True, ), timing: Path = typer.Option( None, "--timing", exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True, help=( "Optional path to timing data file " "(JSON from pytest-json-report or text from pytest --durations)" ), ), ) -> None: r"""Convert pytest-cov JSON coverage data to TSV format. This tool reads a JSON file containing pytest-cov coverage data and converts it to a tab-separated values (TSV) format. The output includes details about file coverage, function coverage, and missing line information. Examples: coverage_to_tsv.py coverage.json coverage_to_tsv.py coverage.json output.tsv coverage_to_tsv.py coverage.json output.tsv --timing timing.txt coverage_to_tsv.py /path/to/coverage.json /path/to/output.tsv \\ --timing timing.json """ console = Console() try: # Load JSON data console.print(f"Reading coverage data from [cyan]{input_file}[/cyan]...") coverage_data = load_json_file(console, input_file, "coverage data") # Load timing data if provided timing_data = None if timing: console.print(f"Reading timing data from [cyan]{timing}[/cyan]...") timing_data = parse_timing_data(console, timing) console.print(f"Loaded timing data for {len(timing_data)} tests") # Extract coverage data console.print("Extracting function-level coverage data...") extracted_data = extract_coverage_data(coverage_data, timing_data) # Write TSV file console.print( f"Writing {len(extracted_data)} records to [cyan]{output_file}[/cyan]..." ) write_tsv_file(extracted_data, output_file) msg = "Successfully converted coverage data to TSV format!" console.print(f"[bold green]{msg}[/bold green]") console.print(f"Output file: [cyan]{output_file}[/cyan]") console.print(f"Records written: {len(extracted_data)}") if timing_data: records_with_timing = sum( 1 for record in extracted_data if record.test_duration is not None ) console.print(f"Records with timing data: {records_with_timing}") except OSError as e: # pragma: no cover # Defensive: load_json_file and write_tsv_file handle errors via cli_error console.print(f"[bold red]Error: File operation failed: {e}[/bold red]") sys.exit(1) except KeyError as e: # pragma: no cover # Defensive: extract_coverage_data uses .get() to avoid KeyError console.print( f"[bold red]Error: Missing expected key in JSON data: {e}[/bold red]" ) sys.exit(1)
if __name__ == "__main__": # pragma: no cover app()