Source code for pyirf.interpolation.moment_morph_interpolator

import numpy as np
from pyirf.binning import bin_center, calculate_bin_indices
from scipy.spatial import Delaunay

from .base_interpolators import DiscretePDFInterpolator, PDFNormalization
from .utils import get_bin_width

__all__ = [
    "MomentMorphInterpolator",
]


def _estimate_mean_std(bin_edges, binned_pdf, normalization):
    """
    Function to roughly estimate mean and standard deviation from a histogram.

    Parameters
    ----------
    bin_edges: np.ndarray, shape=(n_bins+1)
        Array of common bin-edges for binned_pdf
    binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
        PDF values from which to compute mean and std
    normalization : PDFNormalization
        How the PDF is normalized

    Returns
    -------
    mean: np.ndarray, shape=(n_points, ...)
        Estimated mean for each input template
    std: np.ndarray, shape=(n_points, ...)
        Estimated standard deviation for each input template. Set to width/2 if only one bin in
        the input template is =/= 0
    """
    # Create an 2darray where the 1darray mids is repeated n_template times
    mids = np.broadcast_to(bin_center(bin_edges), binned_pdf.shape)

    width = get_bin_width(bin_edges, normalization)

    # integrate pdf to get probability in each bin
    probability = binned_pdf * width
    # Weighted averages to compute mean and std
    mean = np.average(mids, weights=probability, axis=-1)
    std = np.sqrt(
        np.average((mids - mean[..., np.newaxis]) ** 2, weights=probability, axis=-1)
    )

    # Set std to 0.5*width for all those templates that have only one bin =/= 0. In those
    # cases mids-mean = 0 and therefore std = 0. Uses the width of the one bin with
    # binned_pdf!=0
    mask = std == 0
    if np.any(mask):
        width = np.diff(bin_edges)
        # std of a uniform distribution inside the bin
        uniform_std = np.broadcast_to(np.sqrt(1 / 12) * width, binned_pdf[mask].shape)
        std[mask] = uniform_std[binned_pdf[mask, :] != 0]

    return mean, std


def _lookup(bin_edges, binned_pdf, x):
    """
    Function to return the bin-height at a desired point.

    Parameters
    ----------
    bin_edges: np.ndarray, shape=(n_bins+1)
        Array of common bin-edges for binned_pdf
    binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
        Array of bin-entries, actual
    x: numpy.ndarray, shape=(n_points, ..., n_bins)
        Array of n_bins points for each input template, where the histogram-value (bin-height) should be found

    Returns
    -------
    y: numpy.ndarray, shape=(n_points, ..., n_bins)
        Array of the bin-heights at the n_bins points x, set to 0 at each point outside the histogram

    """
    # Find the bin where each point x is located in
    binnr, valid = calculate_bin_indices(x, bin_edges)

    # Set under/overflow-bins (invalid bins) to 0 to avoid errors below
    binnr[~valid] = 0

    # Loop over every combination of flattend input histograms and flattend binning
    lu = np.array(
        [
            cont[binnr_row]
            for cont, binnr_row in zip(
                binned_pdf.reshape(-1, binned_pdf.shape[-1]),
                binnr.reshape(-1, binnr.shape[-1]),
            )
        ]
    ).reshape(binned_pdf.shape)

    # Set all invalid bins to 0
    lu[~valid] = 0

    # Set under-/ overflowbins to 0, reshape to original shape
    return lu


def linesegment_1D_interpolation_coefficients(grid_points, target_point):
    """
    Compute 1D interpolation coefficients for moment morph interpolation,
    as in eq. (6) of [1]

    Parameters
    ----------
    grid_points: np.ndarray, shape=(2, 1)
        Points spanning a triangle in which
    target_point: numpy.ndarray, shape=(1, 1)
        Value at which the histogram should be interpolated

    Returns
    -------
    coefficients: numpy.ndarray, shape=(2,)
        Interpolation coefficients for all three interpolation simplex vertices
        to interpolate to the target_point

    References
    ----------
    .. [1] M. Baak, S. Gadatsch, R. Harrington and W. Verkerke (2015). Interpolation between
           multi-dimensional histograms using a new non-linear moment morphing method
           Nucl. Instrum. Methods Phys. Res. A 771, 39-48. https://doi.org/10.1016/j.nima.2014.10.033
    """
    # Set zeroth grid point as reference value
    m0 = grid_points[0, :]

    # Compute matrix M as in eq. (2) of [1]
    j = np.arange(0, grid_points.shape[0], 1)
    m_ij = (grid_points - m0) ** j

    # Compute coefficients, eq. (6) from [1]
    return np.einsum("...j, ji -> ...i", (target_point - m0) ** j, np.linalg.inv(m_ij))


def barycentric_2D_interpolation_coefficients(grid_points, target_point):
    """
    Compute barycentric 2D interpolation coefficients for triangular
    interpolation, see e.g. [1]

    Parameters
    ----------
    grid_points: np.ndarray, shape=(3, 2)
        Points spanning a triangle in which
    target_point: np.ndarray, shape=(1, 2)
        Value at which barycentric interpolation is needed

    Returns
    -------
    coefficients: numpy.ndarray, shape=(3,)
        Interpolation coefficients for all three interpolation simplex vertices
        to interpolate to the target_point

    References
    ----------
    .. [1] https://codeplea.com/triangular-interpolation
    """
    # Compute distance vectors between the grid points
    d13 = grid_points[0, :] - grid_points[2, :]
    d23 = grid_points[1, :] - grid_points[2, :]

    # Compute distance vector between target and third grid point
    dp3 = target_point.squeeze() - grid_points[2, :]

    # Compute first and second weight
    w1 = ((d23[1] * dp3[0]) + (-d23[0] * dp3[1])) / (
        (d23[1] * d13[0]) + (-d23[0] * d13[1])
    )
    w2 = ((-d13[1] * dp3[0]) + (d13[0] * dp3[1])) / (
        (d23[1] * d13[0]) + (-d23[0] * d13[1])
    )

    # Use w1+w2+w3 = 1 for third weight
    w3 = 1 - w1 - w2

    coefficients = np.array([w1, w2, w3])

    return coefficients


def moment_morph_estimation(bin_edges, binned_pdf, coefficients, normalization):
    """
    Function that wraps up the moment morph procedure [1] adopted for histograms.

    Parameters
    ----------
    bin_edges: np.ndarray, shape=(n_bins+1)
        Array of common bin-edges for binned_pdf
    binned_pdf: np.ndarray, shape=(n_points, ..., n_bins)
        Array of bin-entries, actual
    coefficients: np.ndarray, shape=(n_points)
        Estimation coefficients for each entry in binned_pdf
    normalization : PDFNormalization
        How the PDF is normalized

    Returns
    -------
    f_new: numpy.ndarray, shape=(1, n_bins)
        Interpolated histogram

    References
    ----------
    .. [1] M. Baak, S. Gadatsch, R. Harrington and W. Verkerke (2015). Interpolation between
           multi-dimensional histograms using a new non-linear moment morphing method
           Nucl. Instrum. Methods Phys. Res. A 771, 39-48. https://doi.org/10.1016/j.nima.2014.10.033
    """
    bin_mids = bin_center(bin_edges)

    # Catch all those templates, where at least one template histogram is all zeros.
    zero_templates = ~np.all(~np.isclose(np.sum(binned_pdf, axis=-1), 0), 0)

    # Manipulate those templates so that computations pass without error
    binned_pdf[:, zero_templates] = np.full(len(bin_mids), 1 / len(bin_mids))

    # Estimate mean and std for each input template histogram. First adaption needed to extend
    # the moment morph procedure to histograms
    mus, sigs = _estimate_mean_std(
        bin_edges=bin_edges, binned_pdf=binned_pdf, normalization=normalization
    )
    coefficients = coefficients.reshape(
        binned_pdf.shape[0], *np.ones(mus.ndim - 1, "int")
    )

    # Transform mean and std as in eq. (11) and (12) in [1]
    # cs = np.broadcast_to(cs, mus.shape)
    mu_prime = np.sum(coefficients * mus, axis=0)
    sig_prime = np.sum(coefficients * sigs, axis=0)

    # Compute slope and offset as in eq. (14) and (15) in [1]
    aij = sigs / sig_prime
    bij = mus - mu_prime * aij

    # Transformation as in eq. (13) in [1]
    mids = np.broadcast_to(bin_mids, binned_pdf.shape)
    transf_mids = aij[..., np.newaxis] * mids + bij[..., np.newaxis]

    # Compute the morphed historgram according to eq. (18) in [1]. The function "lookup" "resamples"
    # the histogram at the transformed bin-mids by using the templates historgam value at the transformed
    # bin-mid as new value for a whole transformed bin. Second adaption needed to extend
    # the moment morph procedure to histograms, adaptes the behaviour of eq. (16)

    transf_hist = _lookup(bin_edges=bin_edges, binned_pdf=binned_pdf, x=transf_mids)

    f_new = np.sum(
        np.expand_dims(coefficients, -1) * transf_hist * np.expand_dims(aij, -1), axis=0
    )

    # Reset interpolation resolts for those templates with partially zero entries from above to 0
    f_new[zero_templates] = np.zeros(len(bin_mids))

    # Re-Normalize, needed, as the estimation of the std used above is not exact but the result is scaled with
    # the estimated std
    bin_widths = get_bin_width(bin_edges, normalization)
    norm = np.expand_dims(np.sum(f_new * bin_widths, axis=-1), -1)

    return np.divide(f_new, norm, out=np.zeros_like(f_new), where=norm != 0).reshape(
        1, *binned_pdf.shape[1:]
    )


[docs] class MomentMorphInterpolator(DiscretePDFInterpolator): """Interpolator class providing Moment Morphing to interpolate discretized PDFs.""" def __init__( self, grid_points, bin_edges, binned_pdf, normalization=PDFNormalization.AREA ): """ Interpolator class using moment morphing. Parameters ---------- grid_points: np.ndarray, shape=(n_points, n_dims) Grid points at which interpolation templates exist. May be one ot two dimensional. bin_edges: np.ndarray, shape=(n_bins+1) Edges of the data binning binned_pdf: np.ndarray, shape=(n_points, ..., n_bins) Content of each bin in bin_edges for each point in grid_points. First dimesion has to correspond to number of grid_points. Interpolation dimension, meaning the the quantity that should be interpolated (e.g. the Migra axis for EDisp) has to be at axis specified by axis-keyword as well as having entries corresponding to the number of bins given through bin_edges keyword. normalization : PDFNormalization How the PDF is normalized Note ---- Also calls pyirf.interpolation.DiscretePDFInterpolator.__init__. """ super().__init__(grid_points, bin_edges, binned_pdf, normalization) if self.grid_dim == 2: self.triangulation = Delaunay(self.grid_points) elif self.grid_dim > 2: raise NotImplementedError( "Interpolation in more then two dimension not impemented." ) def _interpolate1D(self, target_point): """ Function to find target inside 1D self.grid_points and interpolate on this subset. """ target_bin = np.digitize(target_point.squeeze(), self.grid_points.squeeze()) segment_inds = np.array([target_bin - 1, target_bin], "int") coefficients = linesegment_1D_interpolation_coefficients( grid_points=self.grid_points[segment_inds], target_point=target_point, ) return moment_morph_estimation( bin_edges=self.bin_edges, binned_pdf=self.binned_pdf[segment_inds], coefficients=coefficients, normalization=self.normalization, ) def _interpolate2D(self, target_point): """ Function to find target inside 2D self.grid_points and interpolate on this subset. """ simplex_inds = self.triangulation.simplices[ self.triangulation.find_simplex(target_point) ].squeeze() coefficients = barycentric_2D_interpolation_coefficients( grid_points=self.grid_points[simplex_inds], target_point=target_point, ) return moment_morph_estimation( bin_edges=self.bin_edges, binned_pdf=self.binned_pdf[simplex_inds], coefficients=coefficients, normalization=self.normalization, )
[docs] def interpolate(self, target_point): """ Takes a grid of binned pdfs for a bunch of different parameters and interpolates it to given value of those parameters. This function calls implementations of the moment morphing interpolation pocedure introduced in [1]. Parameters ---------- target_point: numpy.ndarray, shape=(1, n_dims) Value for which the interpolation is performed (target point) Returns ------- f_new: numpy.ndarray, shape=(1,...,n_bins) Interpolated and binned pdf References ---------- .. [1] M. Baak, S. Gadatsch, R. Harrington and W. Verkerke (2015). Interpolation between multi-dimensional histograms using a new non-linear moment morphing method Nucl. Instrum. Methods Phys. Res. A 771, 39-48. https://doi.org/10.1016/j.nima.2014.10.033 """ if self.grid_dim == 1: interpolant = self._interpolate1D(target_point) elif self.grid_dim == 2: interpolant = self._interpolate2D(target_point) return interpolant