Source code for firecrown.likelihood.factories

"""Factory functions for creating likelihoods from SACC files.

This module provides factory functions to create likelihood objects by combining a SACC
file and a set of statistic factories. Users can define their own custom statistic
factories for advanced use cases or rely on the generic factory functions provided here
for simpler scenarios.

For straightforward contexts where all data in the SACC file is utilized, the generic
factories simplify the process. The user only needs to supply the SACC file and specify
which statistic factories to use, and the likelihood factory will handle the creation of
the likelihood object, assembling the necessary components automatically.

These functions are particularly useful when the full set of statistics present in a
SACC file is being used without the need for complex customization.
"""

from pathlib import Path

import sacc
import yaml
from pydantic import BaseModel
from typing_extensions import assert_never

from firecrown.ccl_factory import CCLFactory
from firecrown.data_functions import (
    TwoPointBinFilterCollection,
    check_two_point_consistence_harmonic,
    check_two_point_consistence_real,
    extract_all_harmonic_data,
    extract_all_real_data,
)
from firecrown.likelihood.gaussian import ConstGaussian
from firecrown.likelihood.likelihood import Likelihood, NamedParameters
from firecrown.likelihood.two_point import TwoPointFactory
from firecrown.metadata_types import TwoPointCorrelationSpace
from firecrown.modeling_tools import ModelingTools


[docs] def load_sacc_data(filepath: str | Path) -> sacc.Sacc: """Load SACC data from a file, auto-detecting the format. Attempts to load the file first as HDF5, then as FITS if HDF5 fails. This allows the function to work with both modern HDF5-based SACC files and legacy FITS-based SACC files. :param filepath: Path to the SACC data file (str or Path object) :return: Loaded SACC data object :raises FileNotFoundError: If the file does not exist :raises ValueError: If the file cannot be read as either HDF5 or FITS SACC data """ # Convert to Path object for consistent handling file_path = Path(filepath) if isinstance(filepath, str) else filepath # Check if file exists if not file_path.exists(): raise FileNotFoundError(f"SACC file not found: {file_path}") # Try HDF5 first (modern format) hdf5_error = None try: return sacc.Sacc.load_hdf5(str(file_path)) except OSError as e: hdf5_error = e # If HDF5 failed, try FITS (legacy format) fits_error = None try: return sacc.Sacc.load_fits(str(file_path)) except OSError as e: fits_error = e # Both formats failed - provide helpful error message raise ValueError( f"Failed to load SACC data from file: {file_path}\n" f"The file could not be read as either HDF5 or FITS format.\n" f"HDF5 error: {hdf5_error}\n" f"FITS error: {fits_error}" )
[docs] class DataSourceSacc(BaseModel): """Model for the data source in a likelihood configuration.""" sacc_data_file: str filters: TwoPointBinFilterCollection | None = None _path: Path | None = None
[docs] def set_path(self, path: Path) -> None: """Set the path for the data source.""" self._path = path
[docs] def get_filepath(self) -> Path: """Return the filename of the data source. Raises a FileNotFoundError if the file does not exist. :return: The filename """ sacc_data_path = Path(self.sacc_data_file) # If sacc_data_file is absolute, use it directly if sacc_data_path.is_absolute() and sacc_data_path.exists(): return Path(self.sacc_data_file) # If path is set, use it to find the file if self._path is not None: full_sacc_data_path = self._path / sacc_data_path if full_sacc_data_path.exists(): return full_sacc_data_path # If path is not set, use the current directory elif sacc_data_path.exists(): return sacc_data_path # If the file does not exist, raise an error raise FileNotFoundError(f"File {sacc_data_path} does not exist")
[docs] def get_sacc_data(self) -> sacc.Sacc: """Load the SACC data file. Uses automatic format detection to load both HDF5 and FITS files. """ filename = self.get_filepath() return load_sacc_data(filename)
[docs] def ensure_path(file: str | Path) -> Path: """Ensure the file path is a Path object.""" match file: case str(): return Path(file) case Path(): return file case _ as unreachable: assert_never(unreachable)
[docs] class TwoPointExperiment(BaseModel): """Model for the two-point experiment in a likelihood configuration.""" two_point_factory: TwoPointFactory data_source: DataSourceSacc ccl_factory: CCLFactory | None = None
[docs] def model_post_init(self, _, /) -> None: """Initialize the TwoPointExperiment object.""" if self.ccl_factory is None: self.ccl_factory = CCLFactory()
[docs] @classmethod def load_from_yaml(cls, file: str | Path) -> "TwoPointExperiment": """Load a TwoPointExperiment object from a YAML file.""" filepath = ensure_path(file) with open(filepath, encoding="utf-8") as f: config = yaml.safe_load(f) tpe = cls.model_validate(config, strict=True) # Record the file directory tpe.data_source.set_path(filepath.parent) return tpe
[docs] def make_likelihood(self) -> Likelihood: """Create a likelihood object for two-point statistics from a SACC file.""" # Load the SACC file sacc_data = self.data_source.get_sacc_data() likelihood: None | Likelihood = None match self.two_point_factory.correlation_space: case TwoPointCorrelationSpace.REAL: likelihood = _build_two_point_likelihood_real( sacc_data, self.two_point_factory, filters=self.data_source.filters ) case TwoPointCorrelationSpace.HARMONIC: likelihood = _build_two_point_likelihood_harmonic( sacc_data, self.two_point_factory, filters=self.data_source.filters ) case _ as unreachable: assert_never(unreachable) assert likelihood is not None return likelihood
[docs] def build_two_point_likelihood( build_parameters: NamedParameters, ) -> tuple[Likelihood, ModelingTools]: """Build a likelihood object for two-point statistics from a SACC file. This function creates a likelihood object for two-point statistics using a SACC file and a set of statistic factories. The user must provide the SACC file and specify which statistic factories to use. The likelihood object is created by combining the SACC file with the specified statistic factories. :param build_parameters: A NamedParameters object containing the following parameters: - sacc_file: The SACC file containing the data. - statistic_factories: A YAML file containing the statistic factories to use. """ likelihood_config_file = build_parameters.get_string("likelihood_config") exp = TwoPointExperiment.load_from_yaml(likelihood_config_file) modeling_tools = ModelingTools(ccl_factory=exp.ccl_factory) likelihood = exp.make_likelihood() return likelihood, modeling_tools
def _build_two_point_likelihood_harmonic( sacc_data: sacc.Sacc, two_point_factory: TwoPointFactory, filters: TwoPointBinFilterCollection | None = None, ): """Build a likelihood object for two-point statistics in harmonic space. This function creates a likelihood object for two-point statistics in harmonic space using a SACC file and a set of statistic factories. The user must provide the SACC file and specify which statistic factories to use. The likelihood object is created by combining the SACC file with the specified statistic factories. :param sacc_data: The SACC file containing the data. :param wl_factory: The weak lensing statistic factory. :param nc_factory: The number counts statistic factory. :return: A likelihood object for two-point statistics in harmonic space. """ tpms = extract_all_harmonic_data(sacc_data) if len(tpms) == 0: raise ValueError( "No two-point measurements in harmonic space found in the SACC file." ) check_two_point_consistence_harmonic(tpms) if filters is not None: tpms = filters(tpms) two_points = two_point_factory.from_measurement(tpms) assert sacc_data.covariance is not None likelihood = ConstGaussian.create_ready(two_points, sacc_data.covariance.dense) return likelihood def _build_two_point_likelihood_real( sacc_data: sacc.Sacc, two_point_factory: TwoPointFactory, filters: TwoPointBinFilterCollection | None = None, ): """Build a likelihood object for two-point statistics in real space. This function creates a likelihood object for two-point statistics in real space using a SACC file and a set of statistic factories. The user must provide the SACC file and specify which statistic factories to use. The likelihood object is created by combining the SACC file with the specified statistic factories. :param sacc_data: The SACC file containing the data. :param wl_factory: The weak lensing statistic factory. :param nc_factory: The number counts statistic factory. :return: A likelihood object for two-point statistics in real space. """ tpms = extract_all_real_data(sacc_data) if len(tpms) == 0: raise ValueError( "No two-point measurements in real space found in the SACC file." ) check_two_point_consistence_real(tpms) if filters is not None: tpms = filters(tpms) two_points = two_point_factory.from_measurement(tpms) assert sacc_data.covariance is not None likelihood = ConstGaussian.create_ready(two_points, sacc_data.covariance.dense) return likelihood