Source code for firecrown.fctools.coverage_to_tsv

#!/usr/bin/env python
"""Convert pytest-cov JSON coverage data to TSV format.

This script reads a coverage.json file generated by pytest-cov and extracts
key information into a tab-separated values (TSV) file for easy analysis.
"""

import json
import re
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any, NamedTuple

import typer
from rich.console import Console

if TYPE_CHECKING:
    from .common import load_json_file
else:
    try:
        from .common import load_json_file
    except ImportError:  # pragma: no cover
        from common import load_json_file



[docs]
class CoverageRecord(NamedTuple):
    """Structure for holding coverage data for a single function."""

    file_path: str
    function_name: str
    covered_lines: int
    total_statements: int
    percent_covered: float
    missing_lines: int
    excluded_lines: int
    num_branches: int
    covered_branches: int
    missing_branches: int
    num_partial_branches: int
    percent_covered_display: str
    file_total_statements: int
    file_covered_lines: int
    file_percent_covered: float
    test_duration: float | None = None



def _load_json_timing(path: Path) -> dict[str, float]:
    """Load timing data from JSON file (pytest-json-report format)."""
    with open(path, encoding="utf-8") as f:
        data = json.load(f)

    timings: dict[str, float] = {}
    if "tests" in data:
        for test in data["tests"]:
            test_name = test.get("nodeid", "")
            duration = test.get("duration", 0.0)
            try:
                timings[test_name] = float(duration or 0.0)
            except (TypeError, ValueError):
                timings[test_name] = 0.0
    return timings


def _parse_duration_line(line: str) -> tuple[str, float] | None:
    """Parse a single duration line from pytest --durations output."""
    duration_pattern = r"(\d+\.?\d*s)\s+(call|setup|teardown)?\s*(.+)"
    m = re.search(duration_pattern, line.strip())
    if not m:
        return None
    duration_str = m.group(1).rstrip("s")
    test_name = m.group(3)
    try:
        duration = float(duration_str)
    except ValueError:  # pragma: no cover
        # Defensive: regex pattern ensures duration_str is always a valid float
        return None
    return test_name, duration


def _load_text_durations(path: Path) -> dict[str, float]:
    """Load timing data from text file (pytest --durations output)."""
    timings: dict[str, float] = {}
    with open(path, encoding="utf-8") as f:
        for line in f:
            parsed = _parse_duration_line(line)
            if parsed is None:
                continue
            test_name, duration = parsed
            timings[test_name] = timings.get(test_name, 0.0) + duration
    return timings



[docs]
def parse_timing_data(console: Console, timing_file: Path | None) -> dict[str, float]:
    """Parse timing data from pytest --durations output or JSON file.

    Args:
        console: The rich console object.
        timing_file: Path to timing data file (JSON or text output)

    Returns:
        Dictionary mapping test names to duration in seconds
    """
    if timing_file is None or not timing_file.exists():
        return {}

    try:
        return _load_json_timing(timing_file)
    except json.JSONDecodeError:
        # Try plain-text durations output
        try:
            return _load_text_durations(timing_file)
        except OSError:
            msg = f"Warning: Could not parse timing data from {timing_file}"
            console.print(f"[yellow]{msg}[/yellow]")
            return {}
    except OSError:
        console.print(
            f"[yellow]Warning: Could not read timing file {timing_file}[/yellow]"
        )
        return {}
    return {}




[docs]
def match_test_to_function(test_name: str, function_name: str, file_path: str) -> float:
    """Try to match a test name to a function for timing correlation.

    Args:
        test_name: Full test identifier (like "tests/test_mod.py::test_method")
        function_name: Function name from coverage data
        file_path: File path from coverage data

    Returns:
        Relevance score (0.0 to 1.0) for the match
    """
    # Extract the base test file and method name
    if "::" in test_name:
        parts = test_name.split("::")
        test_file = parts[0]
        test_method = parts[-1]
    else:
        test_file = test_name
        test_method = ""

    score = 0.0

    # Check if test file corresponds to the source file
    if (
        file_path in test_file
        or test_file.replace("tests/", "").replace("test_", "") in file_path
    ):
        score += 0.3

    # Check if function name appears in test method name
    if function_name.lower() in test_method.lower():
        score += 0.5

    # Check if test method name appears in function name
    if test_method.replace("test_", "").lower() in function_name.lower():
        score += 0.4

    return min(score, 1.0)




[docs]
def extract_coverage_data(
    coverage_data: dict[str, Any], timing_data: dict[str, float] | None = None
) -> list[CoverageRecord]:
    """Extract coverage information from the JSON data, optionally including timing.

    Args:
        coverage_data: The loaded JSON coverage data
        timing_data: Optional dictionary of test timing data

    Returns:
        List of CoverageRecord objects containing detailed coverage information
    """
    # pylint: disable=too-many-locals
    results = []
    timing_data = timing_data or {}

    # Iterate through files in coverage data
    for file_path, file_data in coverage_data.get("files", {}).items():
        if "functions" not in file_data:
            continue

        # Get file-level summary data
        file_summary = file_data.get("summary", {})
        file_total_statements = file_summary.get("num_statements", 0)
        file_covered_lines = file_summary.get("covered_lines", 0)
        file_percent_covered = file_summary.get("percent_covered", 0.0)

        # Extract function-level coverage
        for function_name, func_data in file_data["functions"].items():
            summary = func_data.get("summary", {})

            # Try to find matching timing data
            best_duration = None
            best_score = 0.0

            if timing_data:
                for test_name, duration in timing_data.items():
                    score = match_test_to_function(test_name, function_name, file_path)
                    if (
                        score > best_score and score > 0.3
                    ):  # Minimum threshold for matching
                        best_score = score
                        best_duration = duration

            record = CoverageRecord(
                file_path=file_path,
                function_name=function_name,
                covered_lines=summary.get("covered_lines", 0),
                total_statements=summary.get("num_statements", 0),
                percent_covered=summary.get("percent_covered", 0.0),
                missing_lines=summary.get("missing_lines", 0),
                excluded_lines=summary.get("excluded_lines", 0),
                num_branches=summary.get("num_branches", 0),
                covered_branches=summary.get("covered_branches", 0),
                missing_branches=summary.get("missing_branches", 0),
                num_partial_branches=summary.get("num_partial_branches", 0),
                percent_covered_display=summary.get("percent_covered_display", "0"),
                file_total_statements=file_total_statements,
                file_covered_lines=file_covered_lines,
                file_percent_covered=file_percent_covered,
                test_duration=best_duration,
            )

            results.append(record)

    return results




[docs]
def write_tsv_file(data: list[CoverageRecord], output_file: Path) -> None:
    """Write the extracted data to a TSV file.

    Args:
        data: List of CoverageRecord objects with coverage data
        output_file: Path to output TSV file
    """
    with open(output_file, "w", encoding="utf-8") as f:
        # Write header
        headers = [
            "file_path",
            "function_name",
            "covered_lines",
            "total_statements",
            "percent_covered",
            "missing_lines",
            "excluded_lines",
            "num_branches",
            "covered_branches",
            "missing_branches",
            "num_partial_branches",
            "percent_covered_display",
            "file_total_statements",
            "file_covered_lines",
            "file_percent_covered",
            "test_duration_seconds",
        ]
        f.write("\t".join(headers) + "\n")

        # Write data rows
        for record in data:
            duration_str = (
                str(record.test_duration) if record.test_duration is not None else ""
            )
            row = [
                record.file_path,
                record.function_name,
                str(record.covered_lines),
                str(record.total_statements),
                str(record.percent_covered),
                str(record.missing_lines),
                str(record.excluded_lines),
                str(record.num_branches),
                str(record.covered_branches),
                str(record.missing_branches),
                str(record.num_partial_branches),
                record.percent_covered_display,
                str(record.file_total_statements),
                str(record.file_covered_lines),
                str(record.file_percent_covered),
                duration_str,
            ]
            f.write("\t".join(row) + "\n")



app = typer.Typer()



[docs]
@app.command()
def main(
    input_file: Path = typer.Argument(
        ...,
        exists=True,
        file_okay=True,
        dir_okay=False,
        readable=True,
        resolve_path=True,
        help="Path to the input JSON coverage file",
    ),
    output_file: Path = typer.Argument(
        "coverage_data.tsv",
        help="Path to the output TSV file",
        writable=True,
        resolve_path=True,
    ),
    timing: Path = typer.Option(
        None,
        "--timing",
        exists=True,
        file_okay=True,
        dir_okay=False,
        readable=True,
        resolve_path=True,
        help=(
            "Optional path to timing data file "
            "(JSON from pytest-json-report or text from pytest --durations)"
        ),
    ),
) -> None:
    r"""Convert pytest-cov JSON coverage data to TSV format.

    This tool reads a JSON file containing pytest-cov coverage data and
    converts it to a tab-separated values (TSV) format. The output includes
    details about file coverage, function coverage, and missing line
    information.

    Examples:
    coverage_to_tsv.py coverage.json

    coverage_to_tsv.py coverage.json output.tsv

    coverage_to_tsv.py coverage.json output.tsv --timing timing.txt

    coverage_to_tsv.py /path/to/coverage.json /path/to/output.tsv \\
    --timing timing.json
    """
    console = Console()
    try:
        # Load JSON data
        console.print(f"Reading coverage data from [cyan]{input_file}[/cyan]...")
        coverage_data = load_json_file(console, input_file, "coverage data")

        # Load timing data if provided
        timing_data = None
        if timing:
            console.print(f"Reading timing data from [cyan]{timing}[/cyan]...")
            timing_data = parse_timing_data(console, timing)
            console.print(f"Loaded timing data for {len(timing_data)} tests")

        # Extract coverage data
        console.print("Extracting function-level coverage data...")
        extracted_data = extract_coverage_data(coverage_data, timing_data)

        # Write TSV file
        console.print(
            f"Writing {len(extracted_data)} records to [cyan]{output_file}[/cyan]..."
        )
        write_tsv_file(extracted_data, output_file)

        msg = "Successfully converted coverage data to TSV format!"
        console.print(f"[bold green]{msg}[/bold green]")
        console.print(f"Output file: [cyan]{output_file}[/cyan]")
        console.print(f"Records written: {len(extracted_data)}")

        if timing_data:
            records_with_timing = sum(
                1 for record in extracted_data if record.test_duration is not None
            )
            console.print(f"Records with timing data: {records_with_timing}")

    except OSError as e:  # pragma: no cover
        # Defensive: load_json_file and write_tsv_file handle errors via cli_error
        console.print(f"[bold red]Error: File operation failed: {e}[/bold red]")
        sys.exit(1)
    except KeyError as e:  # pragma: no cover
        # Defensive: extract_coverage_data uses .get() to avoid KeyError
        console.print(
            f"[bold red]Error: Missing expected key in JSON data: {e}[/bold red]"
        )
        sys.exit(1)



if __name__ == "__main__":  # pragma: no cover
    app()