import numpy as np
from astropy.table import QTable
import astropy.units as u
from tqdm import tqdm
import operator
from .cuts import evaluate_binned_cut, calculate_percentile_cut
from .sensitivity import calculate_sensitivity, estimate_background
from .binning import create_histogram_table, bin_center
__all__ = [
"optimize_gh_cut",
]
[docs]
def optimize_gh_cut(
signal,
background,
reco_energy_bins,
gh_cut_efficiencies,
theta_cuts,
op=operator.ge,
fov_offset_min=0 * u.deg,
fov_offset_max=1 * u.deg,
alpha=1.0,
progress=True,
**kwargs
):
"""
Optimize the gh-score cut in every energy bin of reconstructed energy
for best sensitivity.
This procedure is EventDisplay-like, since it only applies a
pre-computed theta cut and then optimizes only the gamma/hadron separation
cut.
Parameters
----------
signal: astropy.table.QTable
event list of simulated signal events.
Required columns are `theta`, `reco_energy`, 'weight', `gh_score`
No directional (theta) or gamma/hadron cut should already be applied.
background: astropy.table.QTable
event list of simulated background events.
Required columns are `reco_source_fov_offset`, `reco_energy`,
'weight', `gh_score`.
No directional (theta) or gamma/hadron cut should already be applied.
reco_energy_bins: astropy.units.Quantity[energy]
Bins in reconstructed energy to use for sensitivity computation
gh_cut_efficiencies: np.ndarray[float, ndim=1]
The cut efficiencies to scan for best sensitivity.
theta_cuts: astropy.table.QTable
cut definition of the energy dependent theta cut,
e.g. as created by ``calculate_percentile_cut``
op: comparison function with signature f(a, b) -> bool
The comparison function to use for the gamma hadron score.
Returning true means an event passes the cut, so is not discarded.
E.g. for gammaness-like score, use `operator.ge` (>=) and for a
hadroness-like score use `operator.le` (<=).
fov_offset_min: astropy.units.Quantity[angle]
Minimum distance from the fov center for background events to be taken into account
fov_offset_max: astropy.units.Quantity[angle]
Maximum distance from the fov center for background events to be taken into account
alpha: float
Size ratio of off region / on region. Will be used to
scale the background rate.
progress: bool
If True, show a progress bar during cut optimization
**kwargs are passed to ``calculate_sensitivity``
"""
# we apply each cut for all reco_energy_bins globally, calculate the
# sensitivity and then lookup the best sensitivity for each
# bin independently
signal_selected_theta = evaluate_binned_cut(
signal['theta'], signal['reco_energy'], theta_cuts,
op=operator.le,
)
sensitivities = []
gh_cuts = []
for efficiency in tqdm(gh_cut_efficiencies, disable=not progress):
# calculate necessary percentile needed for
# ``calculate_percentile_cut`` with the correct efficiency.
# Depends on the operator, since we need to invert the
# efficiency if we compare using >=, since percentile is
# defines as <=.
if op(-1, 1): # if operator behaves like "<=", "<" etc:
percentile = 100 * efficiency
fill_value = signal['gh_score'].min()
else: # operator behaves like ">=", ">"
percentile = 100 * (1 - efficiency)
fill_value = signal['gh_score'].max()
gh_cut = calculate_percentile_cut(
signal['gh_score'], signal['reco_energy'],
bins=reco_energy_bins,
fill_value=fill_value, percentile=percentile,
)
gh_cuts.append(gh_cut)
# apply the current cut
signal_selected = evaluate_binned_cut(
signal["gh_score"], signal["reco_energy"], gh_cut, op,
) & signal_selected_theta
background_selected = evaluate_binned_cut(
background["gh_score"], background["reco_energy"], gh_cut, op,
)
# create the histograms
signal_hist = create_histogram_table(
signal[signal_selected], reco_energy_bins, "reco_energy"
)
background_hist = estimate_background(
events=background[background_selected],
reco_energy_bins=reco_energy_bins,
theta_cuts=theta_cuts,
alpha=alpha,
fov_offset_min=fov_offset_min,
fov_offset_max=fov_offset_max,
)
sensitivity = calculate_sensitivity(
signal_hist, background_hist, alpha=alpha,
**kwargs,
)
sensitivities.append(sensitivity)
best_cut_table = QTable()
best_cut_table["low"] = reco_energy_bins[0:-1]
best_cut_table["center"] = bin_center(reco_energy_bins)
best_cut_table["high"] = reco_energy_bins[1:]
best_cut_table["cut"] = np.nan
best_sensitivity = sensitivities[0].copy()
for bin_id in range(len(reco_energy_bins) - 1):
sensitivities_bin = [s["relative_sensitivity"][bin_id] for s in sensitivities]
if not np.all(np.isnan(sensitivities_bin)):
# nanargmin won't return the index of nan entries
best = np.nanargmin(sensitivities_bin)
else:
# if all are invalid, just use the first one
best = 0
best_sensitivity[bin_id] = sensitivities[best][bin_id]
best_cut_table["cut"][bin_id] = gh_cuts[best]["cut"][bin_id]
return best_sensitivity, best_cut_table