Source code for icefit.icelimit

# Standalone signal upper limit calculus (MC and Asymptotic)
# 
# Run with: pytest icefit/icelimit.py -rP
#
# m.mieskolainen@imperial.ac.uk, 2023

import numpy as np
import scipy.optimize
from scipy import stats
from tqdm import tqdm
from scipy.stats import ncx2,norm
import matplotlib.pyplot as plt

#import mystic

from icefit import lognormal


[docs] def convolve_systematics(n, syst_error, num_toys, error_type='log-normal-mean'): """ Produce an array of distribution convoluted values Args: n: expected count value syst_error: error size (e.g. standard deviation) num_toys: number of MC samples error_type: type of distribution Returns: array of counts """ if error_type == 'flat': arr = np.random.uniform(low=n-syst_error, high=n+syst_error, size=num_toys) elif error_type == 'normal': arr = np.random.normal(loc=n, scale=syst_error, size=num_toys) elif error_type == 'log-normal-mean': arr = lognormal.rand_lognormal(m=n, v=syst_error**2, N=num_toys, inmode='mean') elif error_type == 'log-normal-median': arr = lognormal.rand_lognormal(m=n, v=syst_error**2, N=num_toys, inmode='median') else: raise Exception(f'convolve_systematics: unknown error_type "{error_type}"') return arr
[docs] def LL_splusb(n, s,b, EPS=1e-15): """ Single channel Poisson log-likelihood Args: n: counts s: expected signal count (mean) b: expected background count (mean) """ a = s + b # - np.log(np.math.factorial(n)) is cancelled in the log-likelihood ratio return -a + np.log(np.clip(a, EPS, None))*n
[docs] def CL_single(mu, s_hypothesis, bg_expected, observed, num_toys_pdf=int(1E4), s_syst_error=None, s_syst_type='flat', bg_syst_error=None, bg_syst_type='flat', use_CLs=True): """ One tailed signal upper limits using "toy" Monte Carlo. A single channel counting experiment case with "quasi-Bayesian" style systematics convoluted in via MC. Args: mu: Signal strength parameter s_hypothesis: Number of predicted signal events (with mu = 1) bg_expected: Number of expected background events observed: Number of observed events num_toys_pdf: Number of MC samples to construct PDFs use_CLs: Return "CLs" type limits (s)bg_syst_error: Systematic error on estimated (signal) background (s)bg_error_type: Systematic uncertainty perturbation type (flat, normal, log-normal) Return: p-value References: For the "CLs" approach motivation, see: A. Read, "Presentation of search results: the CLs technique" https://en.wikipedia.org/wiki/CLs_method_(particle_physics) """ if observed < 0: observed = 0 # Protect unphysical input (e.g. NLO MC) # Systematics convoluted in here with MC if bg_syst_error != None: b_arr = convolve_systematics(n=bg_expected, syst_error=bg_syst_error, error_type=bg_syst_type, num_toys=num_toys_pdf) else: b_arr = np.ones(num_toys_pdf) * bg_expected if s_syst_error != None: s_arr = convolve_systematics(n=s_hypothesis, syst_error=s_syst_error, error_type=s_syst_type, num_toys=num_toys_pdf) else: s_arr = np.ones(num_toys_pdf) * (mu * s_hypothesis) # ----------------------------------------------- s_arr[s_arr < 0] = 0 # Protection for Poisson sampling b_arr[b_arr < 0] = 0 s_toys = np.random.poisson(s_arr) # Sample according to Poisson mean given by arr b_toys = np.random.poisson(b_arr) # --|-- # ------------------------------- # We use pure MC PDF-based "ordering principle" here (no likelihood ratio involved) ## Prob{s+b}(X <= X_obs), Prob{b}(X <= X_obs) # [important to have less and EQUAL to, e.g. for the zero case] p_splusb = np.sum(s_toys+b_toys <= observed) / num_toys_pdf p_b = np.sum(b_toys <= observed) / num_toys_pdf if use_CLs: return (p_splusb / p_b) if p_b > 0 else p_splusb else: return p_splusb
[docs] def CL_single_asymptotic(mu, s_hypothesis, bg_expected, observed): """ One tailed signal upper limits using asymptotic (Gaussian) approximation. A single channel counting experiment case (without systematics --> requires profile likelihood) Args: mu: Signal strength parameter s_hypothesis: Number of predicted signal events (with mu = 1) bg_expected: Number of expected background events observed: Number of observed events Return: p-value References: Cowan et al, "Asymptotic formulae for likelihood-based tests of new physics", https://arxiv.org/abs/1007.1727 https://indico.cern.ch/event/126652/contributions/1343592/ attachments/80222/115004/Frequentist_Limit_Recommendation.pdf https://arxiv.org/abs/2102.04275 """ if observed < 0: observed = 0 # Protect unphysical input (e.g. NLO MC) # Maximum likelihood estimate shat = observed - bg_expected # Likelihood-ratio test statistic if shat < (mu * s_hypothesis): LL_mu_numer = LL_splusb(n=observed, s=(mu * s_hypothesis), b=bg_expected) LL_mu_denom = LL_splusb(n=observed, s=shat, b=bg_expected) q_mu = -2 * (LL_mu_numer - LL_mu_denom) else: q_mu = 0 # Asymptotic p-value p_mu = 1 - norm.cdf(np.sqrt(q_mu)) return p_mu, q_mu
[docs] def asymptotic_uncertainty_CLs(mu_up, q, alpha, N): """ Asymptotic (Asimov) +-N sigma values around median expected N = 0 gives the median. Type "CLs", keeps the minus sigma tail on positive side of mu. See reference: "Frequentist_Limit_Recommendation.pdf" Args: mu_up: iteratively solved median expected upper limit value q: test statistic value corresponding to mu_up alpha: confidence level at which mu_up was extracted N: array of sigmas e.g. [-2,-1,0,1,2] """ sigma = np.sqrt(mu_up**2 / q) out = sigma * (norm.ppf(1 - alpha * norm.cdf(N)) + N) #print(f'{mu_up} || {out[2]}') return out
[docs] def CL_single_compute(s_hypothesis, bg_expected, observed=None, s_syst_error=None, bg_syst_error=None, method='toys', alpha=0.05, num_toys_pdf=int(1E4), num_toys_obs=int(1E3), mu_bounds=(1e-5, 1E4), mu_init=1.0, max_restart=1000, func_tol=1E-5, optimize_method='golden', maxiter=10000): """ Main solver function for the upper limit computation on the signal strength parameter mu. Args: s_hypothesis: expected signal count (with mu = 1) bg_expected: expected background count observed: observed event count, if None then observed = bg_expected method: 'toys' or 'asymptotic' num_toys: number of MC samples for toys alpha: confidence level for the upper limit (e.g. 0.05 ~ CL95 upper limits) s_syst_error: systematic error size on signal bg_syst_error: systematic error size on background Optimization args: mu_bounds: bounds on mu-value to obey mu_init: initial value max_restart: maximum number of restarts func_tol: quadratic loss tolerance optimize_method: method to use for the optimization, 'brent' or 'golden' maxiter: maximum number of iterations Returns: upper limits [-2 sigma, 1 sigma, median, +1 sigma, +2 sigma] at the level alpha """ # To find the expected limit, we insert in the Asimov data which is the expected # background (with no fluctuations) with the signal = 0. if observed == None: X = bg_expected # Observed else: X = observed ## Cost function for the limit inversion (i.e. finding out value of r which corresponds to alpha) def func(mu): mu_value = mu[0] if type(mu) is list else mu # if the optimization function uses lists if method == 'toys': p = CL_single(mu=mu_value, s_hypothesis=s_hypothesis, observed=X_input, bg_expected=bg_expected, s_syst_error=s_syst_error, bg_syst_error=bg_syst_error, num_toys_pdf=num_toys_pdf) elif method == 'asymptotic': p, q_mu = CL_single_asymptotic(mu=mu_value, s_hypothesis=s_hypothesis, bg_expected=bg_expected, observed=X_input) else: raise Exception(__name__ + '.CL_single_compute: Unknown computation method') return (p - alpha)**2 # quadratic loss optimizer_param = {'method': optimize_method, 'tol': 1E-8, 'options': {'maxiter': maxiter}} ## Compute limit inversion repeat = num_toys_obs if (method == 'toys' and observed == None) else 1 mu_up = np.nan * np.ones(repeat) i = 0 trials = 0 while i < repeat: # Generate toy data for the expected limits X_input = np.random.poisson(X) if (method == 'toys' and observed == None) else X # Minimize out = scipy.optimize.minimize_scalar(fun=func, **optimizer_param) if out['x'] > mu_bounds[0] and out['x'] < mu_bounds[1] and out['fun'] < func_tol: mu_up[i] = out['x'] i += 1 else: trials += 1 if trials > max_restart: mu_up[i] = np.nan i += 1 trials = 0 print(__name__ + '.CL_single_compute: Minimizer problem; set NaN (check parameter and input feasibility)') # Compute final results for mu_up(at the level alpha) +- 1(2) sigma uncertainties if observed is None: if method == 'asymptotic': _, q = CL_single_asymptotic(mu=mu_up[0], s_hypothesis=s_hypothesis, bg_expected=bg_expected, observed=X) output = asymptotic_uncertainty_CLs(mu_up=mu_up[0], q=q, alpha=alpha, N=np.array([-2,-1,0,1,2])) else: output = np.array([np.percentile(mu_up, quant) for quant in [2.5, 16, 50, 84, 97.5]]) else: output = mu_up return output
[docs] def test_limits_unit_test(EPS=0.05): """ Simple unit tests """ # Test counts s_hypothesis = 500.0 bg_expected = 1500.0 observed = bg_expected + s_hypothesis + 10 expected_results = {'asymptotic': None, 'toys': None} observed_results = {'asymptotic': None, 'toys': None} for method in expected_results.keys(): print(f'\n[Method: {method}]') expected_results[method] = CL_single_compute(method=method, observed=None, s_hypothesis=s_hypothesis, bg_expected=bg_expected) print('Expected upper 95CL limit on xs/xs_{benchmark} (H0: background only scenario):') print(np.round(expected_results[method],4)) print('') observed_results[method] = CL_single_compute(method=method, observed=observed, s_hypothesis=s_hypothesis, bg_expected=bg_expected) print('Simulated (observed) 95CL upper limit on xs/xs_{benchmark} (H1: nominal signal (mu=1) + background scenario):') print(np.round(observed_results[method],4)) assert np.linalg.norm(expected_results['asymptotic'] - expected_results['toys']) < EPS assert np.linalg.norm(observed_results['asymptotic'] - observed_results['toys']) < EPS
#else: # raise Exception('Minimization failed') """ rvalues = np.logspace(np.log10(r_bounds[0]), np.log10(r_bounds[1]), 10000) import matplotlib.pyplot as plt cc = np.zeros(len(rvalues)) for kk in range(len(rvalues)): cc[kk] = CL_single_asymptotic(s_hypothesis=rvalues[kk] * s_hypothesis, bg_expected=bg_expected, observed=X) fig, ax = plt.subplots() plt.plot(rvalues, cc) plt.xscale('log') plt.xlabel('$r$'); plt.ylabel('CL'); plt.title(f'$\\alpha = {alpha[i]:0.3f}$') plt.savefig(f'tmp/method={method}_alpha={alpha[i]}_trials={trials}.pdf') plt.close() # -------------- # Slower but more global search via mystic solver monitor = mystic.monitors.VerboseMonitor(0) if mystic_solver == 'diffev2': solver = mystic.solvers.diffev2 elif mystic_solver == 'fmin': solver = mystic.solvers.fmin elif mystic_solver == 'fmin_powell': solver = mystic.solvers.fmin_powell result = solver(func, x0=[start], bounds=[r_bounds], penalty=None, ftol=func_tol, \ disp=False, full_output=True, itermon=monitor, maxiter=mystic_maxiter) mystic_maxiter += 50 r_up.append( result[0] ) fun = result[1] """