Source code for pyirf.cut_optimization

import numpy as np
from astropy.table import QTable
import astropy.units as u
from tqdm import tqdm
import operator

from .cuts import evaluate_binned_cut, calculate_percentile_cut
from .sensitivity import calculate_sensitivity, estimate_background
from .binning import create_histogram_table, bin_center


__all__ = [
    "optimize_gh_cut",
]



[docs]
def optimize_gh_cut(
    signal,
    background,
    reco_energy_bins,
    gh_cut_efficiencies,
    theta_cuts,
    op=operator.ge,
    fov_offset_min=0 * u.deg,
    fov_offset_max=1 * u.deg,
    alpha=1.0,
    progress=True,
    **kwargs
):
    """
    Optimize the gh-score cut in every energy bin of reconstructed energy
    for best sensitivity.

    This procedure is EventDisplay-like, since it only applies a
    pre-computed theta cut and then optimizes only the gamma/hadron separation
    cut.

    Parameters
    ----------
    signal: astropy.table.QTable
        event list of simulated signal events.
        Required columns are `theta`, `reco_energy`, 'weight', `gh_score`
        No directional (theta) or gamma/hadron cut should already be applied.
    background: astropy.table.QTable
        event list of simulated background events.
        Required columns are `reco_source_fov_offset`, `reco_energy`,
        'weight', `gh_score`.
        No directional (theta) or gamma/hadron cut should already be applied.
    reco_energy_bins: astropy.units.Quantity[energy]
        Bins in reconstructed energy to use for sensitivity computation
    gh_cut_efficiencies: np.ndarray[float, ndim=1]
        The cut efficiencies to scan for best sensitivity.
    theta_cuts: astropy.table.QTable
        cut definition of the energy dependent theta cut,
        e.g. as created by ``calculate_percentile_cut``
    op: comparison function with signature f(a, b) -> bool
        The comparison function to use for the gamma hadron score.
        Returning true means an event passes the cut, so is not discarded.
        E.g. for gammaness-like score, use `operator.ge` (>=) and for a
        hadroness-like score use `operator.le` (<=).
    fov_offset_min: astropy.units.Quantity[angle]
        Minimum distance from the fov center for background events to be taken into account
    fov_offset_max: astropy.units.Quantity[angle]
        Maximum distance from the fov center for background events to be taken into account
    alpha: float
        Size ratio of off region / on region. Will be used to
        scale the background rate.
    progress: bool
        If True, show a progress bar during cut optimization
    **kwargs are passed to ``calculate_sensitivity``
    """

    # we apply each cut for all reco_energy_bins globally, calculate the
    # sensitivity and then lookup the best sensitivity for each
    # bin independently

    signal_selected_theta = evaluate_binned_cut(
        signal['theta'], signal['reco_energy'], theta_cuts,
        op=operator.le,
    )

    sensitivities = []
    gh_cuts = []
    for efficiency in tqdm(gh_cut_efficiencies, disable=not progress):

        # calculate necessary percentile needed for
        # ``calculate_percentile_cut`` with the correct efficiency.
        # Depends on the operator, since we need to invert the
        # efficiency if we compare using >=, since percentile is
        # defines as <=.
        if op(-1, 1): # if operator behaves like "<=", "<" etc:
            percentile = 100 * efficiency
            fill_value = signal['gh_score'].min()
        else: # operator behaves like ">=", ">"
            percentile = 100 * (1 - efficiency)
            fill_value = signal['gh_score'].max()

        gh_cut = calculate_percentile_cut(
            signal['gh_score'], signal['reco_energy'],
            bins=reco_energy_bins,
            fill_value=fill_value, percentile=percentile,
        )
        gh_cuts.append(gh_cut)

        # apply the current cut
        signal_selected = evaluate_binned_cut(
            signal["gh_score"], signal["reco_energy"], gh_cut, op,
        ) & signal_selected_theta

        background_selected = evaluate_binned_cut(
            background["gh_score"], background["reco_energy"], gh_cut, op,
        )

        # create the histograms
        signal_hist = create_histogram_table(
            signal[signal_selected], reco_energy_bins, "reco_energy"
        )

        background_hist = estimate_background(
            events=background[background_selected],
            reco_energy_bins=reco_energy_bins,
            theta_cuts=theta_cuts,
            alpha=alpha,
            fov_offset_min=fov_offset_min,
            fov_offset_max=fov_offset_max,
        )

        sensitivity = calculate_sensitivity(
            signal_hist, background_hist, alpha=alpha,
            **kwargs,
        )
        sensitivities.append(sensitivity)

    best_cut_table = QTable()
    best_cut_table["low"] = reco_energy_bins[0:-1]
    best_cut_table["center"] = bin_center(reco_energy_bins)
    best_cut_table["high"] = reco_energy_bins[1:]
    best_cut_table["cut"] = np.nan

    best_sensitivity = sensitivities[0].copy()
    for bin_id in range(len(reco_energy_bins) - 1):
        sensitivities_bin = [s["relative_sensitivity"][bin_id] for s in sensitivities]

        if not np.all(np.isnan(sensitivities_bin)):
            # nanargmin won't return the index of nan entries
            best = np.nanargmin(sensitivities_bin)
        else:
            # if all are invalid, just use the first one
            best = 0

        best_sensitivity[bin_id] = sensitivities[best][bin_id]
        best_cut_table["cut"][bin_id] = gh_cuts[best]["cut"][bin_id]

    return best_sensitivity, best_cut_table