#!/usr/bin/env python
"""Convert pytest-cov JSON coverage data to TSV format.
This script reads a coverage.json file generated by pytest-cov and extracts
key information into a tab-separated values (TSV) file for easy analysis.
"""
import json
import re
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any, NamedTuple
import typer
from rich.console import Console
if TYPE_CHECKING:
from .common import load_json_file
else:
try:
from .common import load_json_file
except ImportError: # pragma: no cover
from common import load_json_file
[docs]
class CoverageRecord(NamedTuple):
"""Structure for holding coverage data for a single function."""
file_path: str
function_name: str
covered_lines: int
total_statements: int
percent_covered: float
missing_lines: int
excluded_lines: int
num_branches: int
covered_branches: int
missing_branches: int
num_partial_branches: int
percent_covered_display: str
file_total_statements: int
file_covered_lines: int
file_percent_covered: float
test_duration: float | None = None
def _load_json_timing(path: Path) -> dict[str, float]:
"""Load timing data from JSON file (pytest-json-report format)."""
with open(path, encoding="utf-8") as f:
data = json.load(f)
timings: dict[str, float] = {}
if "tests" in data:
for test in data["tests"]:
test_name = test.get("nodeid", "")
duration = test.get("duration", 0.0)
try:
timings[test_name] = float(duration or 0.0)
except (TypeError, ValueError):
timings[test_name] = 0.0
return timings
def _parse_duration_line(line: str) -> tuple[str, float] | None:
"""Parse a single duration line from pytest --durations output."""
duration_pattern = r"(\d+\.?\d*s)\s+(call|setup|teardown)?\s*(.+)"
m = re.search(duration_pattern, line.strip())
if not m:
return None
duration_str = m.group(1).rstrip("s")
test_name = m.group(3)
try:
duration = float(duration_str)
except ValueError: # pragma: no cover
# Defensive: regex pattern ensures duration_str is always a valid float
return None
return test_name, duration
def _load_text_durations(path: Path) -> dict[str, float]:
"""Load timing data from text file (pytest --durations output)."""
timings: dict[str, float] = {}
with open(path, encoding="utf-8") as f:
for line in f:
parsed = _parse_duration_line(line)
if parsed is None:
continue
test_name, duration = parsed
timings[test_name] = timings.get(test_name, 0.0) + duration
return timings
[docs]
def parse_timing_data(console: Console, timing_file: Path | None) -> dict[str, float]:
"""Parse timing data from pytest --durations output or JSON file.
Args:
console: The rich console object.
timing_file: Path to timing data file (JSON or text output)
Returns:
Dictionary mapping test names to duration in seconds
"""
if timing_file is None or not timing_file.exists():
return {}
try:
return _load_json_timing(timing_file)
except json.JSONDecodeError:
# Try plain-text durations output
try:
return _load_text_durations(timing_file)
except OSError:
msg = f"Warning: Could not parse timing data from {timing_file}"
console.print(f"[yellow]{msg}[/yellow]")
return {}
except OSError:
console.print(
f"[yellow]Warning: Could not read timing file {timing_file}[/yellow]"
)
return {}
return {}
[docs]
def match_test_to_function(test_name: str, function_name: str, file_path: str) -> float:
"""Try to match a test name to a function for timing correlation.
Args:
test_name: Full test identifier (like "tests/test_mod.py::test_method")
function_name: Function name from coverage data
file_path: File path from coverage data
Returns:
Relevance score (0.0 to 1.0) for the match
"""
# Extract the base test file and method name
if "::" in test_name:
parts = test_name.split("::")
test_file = parts[0]
test_method = parts[-1]
else:
test_file = test_name
test_method = ""
score = 0.0
# Check if test file corresponds to the source file
if (
file_path in test_file
or test_file.replace("tests/", "").replace("test_", "") in file_path
):
score += 0.3
# Check if function name appears in test method name
if function_name.lower() in test_method.lower():
score += 0.5
# Check if test method name appears in function name
if test_method.replace("test_", "").lower() in function_name.lower():
score += 0.4
return min(score, 1.0)
[docs]
def write_tsv_file(data: list[CoverageRecord], output_file: Path) -> None:
"""Write the extracted data to a TSV file.
Args:
data: List of CoverageRecord objects with coverage data
output_file: Path to output TSV file
"""
with open(output_file, "w", encoding="utf-8") as f:
# Write header
headers = [
"file_path",
"function_name",
"covered_lines",
"total_statements",
"percent_covered",
"missing_lines",
"excluded_lines",
"num_branches",
"covered_branches",
"missing_branches",
"num_partial_branches",
"percent_covered_display",
"file_total_statements",
"file_covered_lines",
"file_percent_covered",
"test_duration_seconds",
]
f.write("\t".join(headers) + "\n")
# Write data rows
for record in data:
duration_str = (
str(record.test_duration) if record.test_duration is not None else ""
)
row = [
record.file_path,
record.function_name,
str(record.covered_lines),
str(record.total_statements),
str(record.percent_covered),
str(record.missing_lines),
str(record.excluded_lines),
str(record.num_branches),
str(record.covered_branches),
str(record.missing_branches),
str(record.num_partial_branches),
record.percent_covered_display,
str(record.file_total_statements),
str(record.file_covered_lines),
str(record.file_percent_covered),
duration_str,
]
f.write("\t".join(row) + "\n")
app = typer.Typer()
[docs]
@app.command()
def main(
input_file: Path = typer.Argument(
...,
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
help="Path to the input JSON coverage file",
),
output_file: Path = typer.Argument(
"coverage_data.tsv",
help="Path to the output TSV file",
writable=True,
resolve_path=True,
),
timing: Path = typer.Option(
None,
"--timing",
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
help=(
"Optional path to timing data file "
"(JSON from pytest-json-report or text from pytest --durations)"
),
),
) -> None:
r"""Convert pytest-cov JSON coverage data to TSV format.
This tool reads a JSON file containing pytest-cov coverage data and
converts it to a tab-separated values (TSV) format. The output includes
details about file coverage, function coverage, and missing line
information.
Examples:
coverage_to_tsv.py coverage.json
coverage_to_tsv.py coverage.json output.tsv
coverage_to_tsv.py coverage.json output.tsv --timing timing.txt
coverage_to_tsv.py /path/to/coverage.json /path/to/output.tsv \\
--timing timing.json
"""
console = Console()
try:
# Load JSON data
console.print(f"Reading coverage data from [cyan]{input_file}[/cyan]...")
coverage_data = load_json_file(console, input_file, "coverage data")
# Load timing data if provided
timing_data = None
if timing:
console.print(f"Reading timing data from [cyan]{timing}[/cyan]...")
timing_data = parse_timing_data(console, timing)
console.print(f"Loaded timing data for {len(timing_data)} tests")
# Extract coverage data
console.print("Extracting function-level coverage data...")
extracted_data = extract_coverage_data(coverage_data, timing_data)
# Write TSV file
console.print(
f"Writing {len(extracted_data)} records to [cyan]{output_file}[/cyan]..."
)
write_tsv_file(extracted_data, output_file)
msg = "Successfully converted coverage data to TSV format!"
console.print(f"[bold green]{msg}[/bold green]")
console.print(f"Output file: [cyan]{output_file}[/cyan]")
console.print(f"Records written: {len(extracted_data)}")
if timing_data:
records_with_timing = sum(
1 for record in extracted_data if record.test_duration is not None
)
console.print(f"Records with timing data: {records_with_timing}")
except OSError as e: # pragma: no cover
# Defensive: load_json_file and write_tsv_file handle errors via cli_error
console.print(f"[bold red]Error: File operation failed: {e}[/bold red]")
sys.exit(1)
except KeyError as e: # pragma: no cover
# Defensive: extract_coverage_data uses .get() to avoid KeyError
console.print(
f"[bold red]Error: Missing expected key in JSON data: {e}[/bold red]"
)
sys.exit(1)
if __name__ == "__main__": # pragma: no cover
app()