# iceboost == xgboost + torch autograd based extensions
#
# m.mieskolainen@imperial.ac.uk, 2024
import matplotlib.pyplot as plt
import numpy as np
import torch
import os
import xgboost
import copy
from tqdm import tqdm
import pickle
import gc
# icenet
from icenet.tools import aux, stx, plots, io
from icenet.deep import autogradxgb, optimize, losstools, tempscale, deeptools
from icefit import mine, cortools
# ------------------------------------------
from icenet import print
# ------------------------------------------
import ray
from ray import tune
def _hinge_loss(preds: torch.Tensor, targets: torch.Tensor, weights: torch.Tensor=None):
device = preds.device
if weights is None:
w = torch.ones(len(preds))
w = w / torch.sum(w)
else:
w = weights / torch.sum(weights)
# Set computing device
targets = targets.to(device)
w = w.to(device)
targets = 2 * targets - 1
loss = w * torch.max(torch.zeros_like(preds), 1 - preds * targets) ** 2
return loss.sum() * len(preds)
[docs]
def BCE_loss_with_logits(input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor = None, epsilon=None):
"""
Numerically stable BCE loss with logits
https://medium.com/@sahilcarterr/why-nn-bcewithlogitsloss-numerically-stable-6a04f3052967
"""
if epsilon is not None: # Label smoothing
new_target = target * (1 - epsilon) + 0.5 * epsilon
else:
new_target = target
max_val = (-input).clamp_min(0)
loss = (1 - new_target).mul(input).add(max_val).add((-max_val).exp().add((-input - max_val).exp()).log())
if weights is not None:
loss.mul_(weights)
return loss
def _sliced_wasserstein(preds: torch.Tensor, targets: torch.Tensor, weights: torch.Tensor=None, EPS=1E-12):
"""
Custom sliced Wasserstein loss
Negative weights are supported via 'out_weights' variable (update it for each train / eval)
"""
global loss_mode
global x
global SWD_param
global track_loss
global out_weights
track_loss = {}
device = preds.device
if out_weights is not None: # Feed in weights outside
w = torch.from_numpy(out_weights)
w = w / torch.sum(w)
elif weights is None:
w = torch.ones(len(preds))
w = w / torch.sum(w)
else:
w = weights / torch.sum(weights)
# Set computing device
targets = targets.type(torch.int32).to(device)
w = w.to(device)
loss_str = ''
# --------------------------------------------------------------------
# Sliced Wasserstein U->V loss
x = torch.tensor(x, dtype=preds.dtype).to(device)
loss = losstools.SWD_reweight_loss(
logits=preds, x=x, y=targets, weights=w,
p=SWD_param['p'], num_slices=SWD_param['num_slices'], mode=SWD_param['mode'])
txt = f'SWD'
track_loss[txt] = loss.item()
loss_str += f'{txt} = {loss.item():0.5f} | '
# --------------------------------------------------------------------
# Total loss
total_loss = loss
track_loss['sum'] = total_loss.item()
# Print
loss_str = f'Loss[{loss_mode}]: sum: {total_loss.item():0.5f} | ' + loss_str
print(loss_str, 'yellow')
# --------------------------------------------------------------------
# Scale finally to the total number of events (to conform with xgboost internal convention)
return total_loss * len(preds)
def _binary_cross_entropy(preds: torch.Tensor, targets: torch.Tensor, weights: torch.Tensor=None, EPS=1E-12):
"""
Custom binary cross entropy loss with Sliced Wasserstein,
domain adaptation (DA) and mutual information (MI) regularization
Negative weights are supported via 'out_weights' variable (update it for each train / eval)
"""
global loss_mode
global x
global MI_x
global BCE_param
global SWD_param
global MI_param
global track_loss
global out_weights
device = preds.device
if out_weights is not None: # Feed in weights outside
w = torch.from_numpy(out_weights)
w = w / torch.sum(w)
elif weights is None:
w = torch.ones(len(preds))
w = w / torch.sum(w)
else:
w = weights / torch.sum(weights)
# Set computing device
targets = targets.type(torch.int32).to(device)
w = w.to(device)
track_loss = {}
loss_str = ''
# --------------------------------------------------------------------
## Binary Cross Entropy terms
BCE_loss = torch.tensor(0.0).to(device)
for key in BCE_param.keys():
w_this = w.clone()
param = BCE_param[key]
# Label Smoothing
if 'label_eps' in param:
epsilon = param['label_eps']
else:
epsilon = None
# Set labels
t0 = (targets == param['classes'][0])
t1 = (targets == param['classes'][1])
# Check that there are some events
if torch.sum(t0) < 1:
print(f"BCE[{key}] No events from class [{param['classes'][0]}] - skip loss term")
continue
if torch.sum(t1) < 1:
print(f"BCE[{key}] No events from class [{param['classes'][1]}] - skip loss term")
continue
targets_CE = targets.clone()
targets_CE[t0] = 0
targets_CE[t1] = 1
### Now loop over all filter categories
mask0 = param[f'evt_mask_{loss_mode}'][param['classes'][0]]
mask1 = param[f'evt_mask_{loss_mode}'][param['classes'][1]]
N_cat = mask0.shape[0]
loss = torch.tensor(0.0).to(device)
for m in range(N_cat):
# Pick 0-1 hot index mask
m0 = torch.tensor(mask0[m,:], dtype=torch.int32).to(device)
m1 = torch.tensor(mask1[m,:], dtype=torch.int32).to(device)
wfinal = torch.zeros_like(w).to(device) # ! important to init with zeros
# Event weights
wfinal[t0] = m0 * w_this[t0]; wfinal[t0] = wfinal[t0] / torch.sum(wfinal[t0])
wfinal[t1] = m1 * w_this[t1]; wfinal[t1] = wfinal[t1] / torch.sum(wfinal[t1])
wfinal = wfinal / torch.sum(wfinal)
# BCE
CE = BCE_loss_with_logits(input=preds, target=targets_CE, weights=wfinal, epsilon=epsilon)
loss = loss + CE
loss = param["beta"] * loss.sum()
BCE_loss = BCE_loss + loss
if 'label_eps' in param:
txt = f'BCE {key} [$\\beta$ = {param["beta"]}, $\\epsilon$ = {param["label_eps"]}]'
else:
txt = f'BCE {key} [$\\beta$ = {param["beta"]}]'
track_loss[txt] = loss.item()
loss_str += f'{txt} = {loss.item():0.5f} | '
# --------------------------------------------------------------------
# Temperature post-calibration [just for diagnostics atm]
if loss_mode == 'eval':
ts = tempscale.LogitsWithTemperature(mode='binary', device=device)
ts.calibrate(logits=preds, labels=targets.to(torch.float32), weights=w)
# --------------------------------------------------------------------
# Sliced Wasserstein reweight U (y==0) -> V (y==1) transport
SWD_loss = torch.tensor(0.0).to(device)
if (SWD_param is not None) and (abs(SWD_param["beta"]) > 1E-15):
# Total maximum is limited, pick random subsample
if SWD_param['max_N'] is not None and targets.shape[0] > SWD_param['max_N']:
r = np.random.choice(targets.shape[0], size=SWD_param['max_N'], replace=False)
logits_ = preds[r]
x_ = x[r]
y_ = targets[r]
w_ = w[r]
else:
logits_ = preds
x_ = x
y_ = targets
w_ = w
# Pick used variables
x_ = torch.tensor(x_[:, SWD_param['x_dim_index']],
dtype=preds.dtype, device=preds.device) # Map to device
# Evaluate loss
loss = losstools.SWD_reweight_loss(
logits=logits_, x=x_, y=y_, weights=w_,
p=SWD_param['p'], num_slices=SWD_param['num_slices'],
norm_weights=SWD_param['norm_weights'], mode=SWD_param['mode'])
SWD_loss = SWD_param["beta"] * loss
txt = f'SWD [$\\beta$ = {SWD_param["beta"]}]'
track_loss[txt] = SWD_loss.item()
loss_str += f'{txt} = {SWD_loss.item():0.5f} | '
# --------------------------------------------------------------------
## MI Regularization
MI_loss = torch.tensor(0.0).to(device)
if MI_param is not None:
# Loop over chosen classes
k = 0
values = []
for c in MI_param['classes']:
# -----------
reg_param = copy.deepcopy(MI_param)
reg_param['ma_eT'] = reg_param['ma_eT'][k] # Pick the one
# -----------
mask = reg_param[f'evt_mask_{loss_mode}'][c]
# Pick class indices
cind = (targets != None) if c == None else (targets == c)
# Map predictions to [0,1]
Z = torch.clip(torch.sigmoid(preds[cind]).squeeze(), EPS, 1-EPS)
X = torch.Tensor(MI_x).to(device)[cind].squeeze()
W = w[cind]
# Total maximum is limited (for DCORR), pick random subsample
if reg_param['losstype'] == 'DCORR' and reg_param['max_N'] is not None and X.shape[0] > reg_param['max_N']:
r = np.random.choice(len(X), size=reg_param['max_N'], replace=False)
X = X[r]
Z = Z[r]
W = W[r]
mask = mask[:,r]
### Now loop over all filter categories
N_cat = mask.shape[0]
loss_this = torch.tensor(0.0).to(device)
value = torch.tensor(0.0).to(device)
total_ww = 0.0
for m in range(N_cat):
# This needs to be done, otherwise torch doesn't understand indexing!
mm_ = np.array(mask[m,:], dtype=bool)
# Apply target threshold (e.g. we are interested only in high score region)
# First iterations might not yield any events passing this, 'min_count' will take care
if reg_param['min_score'] is not None:
mm_ = mm_ & (Z.detach().cpu().numpy() > reg_param['min_score'])
# Minimum number of events per category cutoff
if reg_param['min_count'] is not None and np.sum(mm_) < reg_param['min_count']:
print(f"MI_reg: {np.sum(mm_)} < {reg_param['min_count']} = reg_param['min_count'] (class [{c}] | category [{m}]) -- skip", 'red')
continue
## Non-Linear Distance Correlation
if reg_param['losstype'] == 'DCORR':
value = value + cortools.distance_corr_torch(x=X[mm_], y=Z[mm_], weights=W[mm_])
## Linear Pearson Correlation (only for DEBUG)
elif reg_param['losstype'] == 'PEARSON':
if len(X.shape) > 1: # if multidim X
for j in range(X.shape[-1]): # dim-by-dim against Z (BDT output)
rho = cortools.corrcoeff_weighted_torch(x=X[mm_, j], y=Z[mm_], weights=W[mm_])
triag = torch.triu(rho, diagonal=1) # upper triangle without diagonal
L = torch.sum(torch.abs(triag))
value = value + L
else:
rho = cortools.corrcoeff_weighted_torch(x=X[mm_], y=Z[mm_], weights=W[mm_])
triag = torch.triu(rho, diagonal=1)
L = torch.sum(torch.abs(triag))
value = value + L
## Neural Mutual Information
else:
# We need .detach() here for Z!
model = mine.estimate(X=X[mm_], Z=Z[mm_].detach(), weights=W[mm_],
return_model_only=True, device=device, **reg_param)
# ------------------------------------------------------------
# Now (re)-apply the MI estimator to the sample
# No .detach() here, we need the gradients wrt Z!
value = mine.apply_mine_batched(X=X[mm_], Z=Z[mm_], weights=W[mm_], model=model,
losstype=reg_param['losstype'], batch_size=reg_param['eval_batch_size'])
# Significance N/sqrt(N) = sqrt(N) weights based on Poisson stats
if MI_param['poisson_weight']:
cat_ww = np.sqrt(np.sum(mm_))
else:
cat_ww = 1.0
if not torch.isfinite(value): # First boost iteration might yield bad values
value = torch.tensor(0.0).to(device)
# Save
loss_this = loss_this + MI_param['beta'][k] * value * cat_ww
total_ww += cat_ww
values.append(np.round(value.item(), 6))
# Finally add this to the total loss
if total_ww > 0:
MI_loss = MI_loss + loss_this / total_ww
k += 1
print(f'RAW {reg_param["losstype"]} = {values}', 'yellow')
txt = f'{reg_param["losstype"]} [$\\beta$ = {MI_param["beta"]}]'
track_loss[txt] = MI_loss.item()
loss_str += f'{txt} = {MI_loss.item():0.5f}'
# --------------------------------------------------------------------
# Total loss
total_loss = BCE_loss + SWD_loss + MI_loss
track_loss['sum'] = total_loss.item()
# Print
loss_str = f'Loss[{loss_mode}]: sum: {total_loss.item():0.5f} | ' + loss_str
print(loss_str, 'yellow')
# --------------------------------------------------------------------
# Scale finally to the total number of events (to conform with xgboost internal convention)
return total_loss * len(preds)
[docs]
def create_filters(param, data_trn, data_val):
# Create filter masks
param['evt_mask_train'] = {}
param['evt_mask_eval'] = {}
for c in param['classes']: # per chosen class
for mode in ['train', 'eval']:
print(f'class[{c}] ({mode}):')
if mode == 'train':
data = data_trn
else:
data = data_val
# Pick class indices
cind = (data.y != None) if c == None else (data.y == c)
# Per filter category
if 'set_filter' in param:
mask, text, path = stx.filter_constructor(
filters=param['set_filter'],
X=data.x[cind,...],
ids=data_trn.ids,
y=data.y[cind])
# All inclusive
else:
mask = np.ones((1, len(data.x[cind,...])), dtype=bool)
text = ['inclusive']
stx.print_stats(mask=mask, text=text)
# Save the mask
param[f'evt_mask_{mode}'][c] = copy.deepcopy(mask)
return param
[docs]
def train_xgb(config={'params': {}}, data_trn=None, data_val=None, y_soft=None, args=None, param=None,
plot_importance=True, data_trn_MI=None, data_val_MI=None):
"""
Train XGBoost model
Args:
See other train_* under train.py
Returns:
trained model
"""
global x
global MI_x
global SWD_param
global BCE_param
global MI_param
global loss_mode
global track_loss
global out_weights
x = None
MI_x = None
SWD_param = None
BCE_param = None
MI_param = None
loss_mode = None
out_weights = None
loss_history_train = {}
loss_history_eval = {}
# --------------------------------------------------------------
# TensorboardX
if not args['__raytune_running__'] and param['tensorboard']:
from tensorboardX import SummaryWriter
writer = SummaryWriter(os.path.join(args['modeldir'], param['label']))
if 'SWD_param' in param:
SWD_param = param['SWD_param']
# Pick variables to use
pick_ind, pick_vars = aux.pick_index(all_ids=data_trn.ids, vars=SWD_param['var'])
print(f'SWD_param: Using variables {pick_vars} ({pick_ind})')
SWD_param['x_dim_index'] = pick_ind
if 'BCE_param' in param:
BCE_param = {}
for key in param['BCE_param'].keys():
print(f'Setting BCE event filters [{key}]', 'green')
BCE_param[key] = create_filters(param=param['BCE_param'][key], data_trn=data_trn, data_val=data_val)
if 'MI_param' in param:
print(f'Setting MI event filters', 'green')
MI_param = copy.deepcopy(param['MI_param']) #! important
MI_param = create_filters(param=MI_param, data_trn=data_trn, data_val=data_val)
# ---------------------------------------------------
if param['model_param']['device'] == 'auto':
param['model_param'].update({'device': 'cuda' if torch.cuda.is_available() else 'cpu'})
print(f'Training <{param["label"]}> classifier ...')
### ** Optimization hyperparameters [possibly from Raytune] **
param['model_param'] = aux.replace_param(default=param['model_param'], raytune=config['params'])
# Activate custom-loss mode
use_custom = True if 'custom' in param['model_param']['objective'] else False
### *********************************
# Normalize weights to sum to the number of events (xgboost library has no scale normalization)
w_trn = data_trn.w / np.sum(data_trn.w) * data_trn.w.shape[0]
w_val = data_val.w / np.sum(data_val.w) * data_val.w.shape[0]
# ---------------------------------------------------------
# Choose weight mode
if np.min(w_trn) < 0.0 or np.min(w_val) < 0.0:
print(f'Negative weights in the sample -- handled via custom loss', 'magenta')
out_weights_on = True
if not use_custom:
raise Exception(__name__ + f'.train_xgb: Need to use custom with negative weights, e.g. "custom:binary_cross_entropy". Change your parameters.')
else:
out_weights_on = False
# ---------------------------------------------------------
print(param)
trn_losses = []
val_losses = []
trn_aucs = []
val_aucs = []
# ---------------------------------------
# Update the parameters
model_param = copy.deepcopy(param['model_param'])
if 'multi' in model_param['objective']:
model_param.update({'num_class': len(args['primary_classes'])})
del model_param['num_boost_round']
# ---------------------------------------
# Boosting iterations
num_epochs = param['model_param']['num_boost_round']
# Prepare input
X_trn, ids_trn = aux.red(X=data_trn.x, ids=data_trn.ids, param=param, verbose=True) # variable reduction
X_val, ids_val = aux.red(X=data_val.x, ids=data_val.ids, param=param, verbose=False) # variable reduction
# Create input xgboost frames
dtrain = xgboost.DMatrix(data=X_trn, label = data_trn.y if y_soft is None else y_soft, weight = w_trn if not out_weights_on else None, feature_names=ids_trn)
deval = xgboost.DMatrix(data=X_val, label = data_val.y, weight = w_val if not out_weights_on else None, feature_names=ids_val)
# -------------------------------------------
# Special optimization parameters
noise_reg = None
if 'opt_param' in param:
if ('noise_reg' in param['opt_param']) and (abs(param['opt_param']['noise_reg']) > 1E-15):
X_trn_orig = copy.deepcopy(X_trn)
noise_reg = param['opt_param']['noise_reg']
# -------------------------------------------
# Custom loss object init
if use_custom:
## Custom loss string of type 'custom_loss:loss_name:hessian:hessian_mode(:parameter)'
strs = model_param['objective'].split(':')
if strs[1] == 'binary_cross_entropy':
loss_func = _binary_cross_entropy
elif strs[1] == 'sliced_wasserstein':
loss_func = _sliced_wasserstein
else:
raise Exception(__name__ + f'.train_xgb: Unknown custom loss {strs[1]} (check syntax)')
## Hessian treatment
# Default values
hessian_mode = 'constant'
hessian_const = 1.0
hessian_gamma = 0.9
hessian_slices = 10
# E.g. 'hessian:constant:1.0', 'hessian:iterative:0.9', 'hessian:hutchinson:10' or 'hessian:exact'
if 'hessian' in strs:
hessian_mode = strs[strs.index('hessian')+1]
# Pick additional parameters
try:
if hessian_mode == 'constant':
hessian_const = float(strs[strs.index('hessian')+2])
elif hessian_mode == 'iterative':
hessian_gamma = float(strs[strs.index('hessian')+2])
elif hessian_mode == 'hutchinson':
hessian_slices = float(strs[strs.index('hessian')+2])
except:
print('Using default Hessian estimator parameters')
autogradObj = autogradxgb.XgboostObjective(
loss_func = loss_func,
hessian_mode = hessian_mode,
hessian_const = hessian_const,
hessian_gamma = hessian_gamma,
hessian_slices = hessian_slices,
device = device
)
for epoch in range(0, num_epochs):
# ---------------------------------------
# "Scheduled noise regularization"
if noise_reg is not None:
sigma2 = noise_reg * deeptools.sigmoid_schedule(t=epoch, N_max=num_epochs)
X_trn = np.sqrt(max(0, 1-sigma2)) * X_trn_orig + np.sqrt(sigma2) * np.random.normal(size=X_trn.shape)
dtrain = xgboost.DMatrix(data=X_trn, label = data_trn.y if y_soft is None else y_soft, weight = w_trn if not out_weights_on else None, feature_names=ids_trn)
print(f'Noise regularization sigma2 = {sigma2:0.4f}')
# ---------------------------------------
## What to evaluate
if epoch == 0 or ((epoch+1) % param['savemode']) == 0 or args['__raytune_running__']:
evallist = [(dtrain, 'train'), (deval, 'eval')]
else:
evallist = [(dtrain, 'train')]
## Prepare parameters
results = dict()
a = {'params': copy.deepcopy(model_param),
'dtrain': dtrain,
'num_boost_round': 1,
'evals': evallist,
'evals_result': results,
'verbose_eval': False}
# ==============================================
## Train
if use_custom:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu:0')
# !
loss_mode = 'train'
x = copy.deepcopy(data_trn.x)
MI_x = copy.deepcopy(data_trn_MI)
## Set objective
a['obj'] = autogradObj
a['params']['disable_default_eval_metric'] = 1
#!
del a['params']['eval_metric']
del a['params']['objective']
# -----------------
if epoch > 0: # Continue from the previous epoch model
a['xgb_model'] = model
if out_weights_on:
out_weights = copy.deepcopy(w_trn)
model = xgboost.train(**a)
if use_custom:
track_loss_train = copy.deepcopy(track_loss) # track_loss from custom loss
else:
train_loss = results['train'][model_param['eval_metric'][0]][0]
# ==============================================
## Validate
if epoch == 0 or ((epoch+1) % param['savemode']) == 0 or args['__raytune_running__']:
# ------- AUC values ------
if len(args['primary_classes']) >= 2:
preds_train = model.predict(dtrain)
if len(preds_train.shape) > 1: preds_train = preds_train[:, args['signal_class']]
metrics_train = aux.Metric(y_true=data_trn.y, y_pred=preds_train, weights=w_trn, class_ids=args['primary_classes'], hist=False, verbose=True)
preds_eval = model.predict(deval)
if len(preds_eval.shape) > 1: preds_eval = preds_eval[:, args['signal_class']]
metrics_eval = aux.Metric(y_true=data_val.y, y_pred=preds_eval, weights=w_val, class_ids=args['primary_classes'], hist=False, verbose=True)
# ------- Loss values ------
if use_custom:
if out_weights_on:
out_weights = copy.deepcopy(w_val)
# !
loss_mode = 'eval'
x = copy.deepcopy(data_val.x)
MI_x = copy.deepcopy(data_val_MI)
a['obj'](preds=preds_eval, targets=deval)[1] / len(data_val.x)
track_loss_eval = copy.deepcopy(track_loss) # track_loss from custom loss
else:
eval_loss = results['eval'][model_param['eval_metric'][0]][0] # Collect the value
if torch.cuda.is_available():
io.showmem_cuda()
else:
io.showmem()
# ==============================================
# Collect values
if use_custom:
optimize.trackloss(loss=track_loss_train, loss_history=loss_history_train)
optimize.trackloss(loss=track_loss_eval, loss_history=loss_history_eval)
trn_losses.append(loss_history_train['sum'][-1]) # For raytune
val_losses.append(loss_history_eval['sum'][-1])
else:
trn_losses.append(train_loss)
val_losses.append(eval_loss)
if len(args['primary_classes']) >= 2:
trn_aucs.append(metrics_train.auc)
val_aucs.append(metrics_eval.auc)
# ==============================================
if not args['__raytune_running__'] and param['tensorboard']:
#writer.add_scalar('lr', scheduler.get_last_lr()[0], epoch)
writer.add_scalar('loss/validation', val_losses[-1], epoch)
writer.add_scalar('loss/train', trn_losses[-1], epoch)
writer.add_scalar('AUC/validation', val_aucs[-1], epoch)
writer.add_scalar('AUC/train', trn_aucs[-1], epoch)
print(f'[{param["label"]}] Tree {epoch+1:03d}/{num_epochs:03d} | Train: loss = {trn_losses[-1]:0.4f}, AUC = {trn_aucs[-1]:0.4f} | Eval: loss = {val_losses[-1]:0.4f}, AUC = {val_aucs[-1]:0.4f}')
# Save the model after all the epochs (will contain all the trees)
if not args['__raytune_running__']:
savedir = aux.makedir(f'{args["modeldir"]}/{param["label"]}')
filename = f'{savedir}/{param["label"]}_{num_epochs-1}'
model.save_model(filename + '.json')
"""
model.save_model(filename + '.ubj')
model.dump_model(filename + '.text', dump_format='text')
"""
losses = {'trn_losses': trn_losses,
'val_losses': val_losses,
'trn_aucs': trn_aucs,
'val_aucs:': val_aucs,
'loss_history_train': loss_history_train,
'loss_history_eval': loss_history_eval}
with open(filename + '.pkl', 'wb') as file:
data = {'model': model, 'ids': ids_trn, 'losses': losses, 'epoch': num_epochs-1, 'param': param}
print(f'Saving model and statistics to file: {filename}')
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
gc.collect() #!
# Report only once after all boost iterations
# otherwise early stopping may happen due to scheduler as with neural net epochs
if args['__raytune_running__']:
#with tune.checkpoint_dir(epoch) as checkpoint_dir:
# path = os.path.join(checkpoint_dir, "checkpoint")
# pickle.dump(model, open(path, 'wb'))
ray.train.report({'loss': trn_losses[-1], 'AUC': val_aucs[-1]})
if not args['__raytune_running__']:
# Plot evolution
plotdir = aux.makedir(f'{args["plotdir"]}/train/loss/{param["label"]}')
if use_custom:
ltr = {f'train: {k}': v for k, v in loss_history_train.items()}
lev = {f'eval: {k}': v for k, v in loss_history_eval.items()}
losses_ = ltr | lev
else:
losses_ = {'train': trn_losses, 'eval': val_losses}
for yscale in ['linear', 'log']:
for xscale in ['linear', 'log']:
fig,ax = plots.plot_train_evolution_multi(losses=losses_, trn_aucs=trn_aucs, val_aucs=val_aucs,
label=param["label"], yscale=yscale, xscale=xscale)
plt.savefig(f"{plotdir}/{param['label']}_losses_yscale_{yscale}_xscale_{xscale}.pdf", bbox_inches='tight')
plt.close(fig)
## Plot feature importance
if plot_importance:
for sort in [True, False]:
for importance_type in ['weight', 'gain', 'cover', 'total_gain', 'total_cover']:
fig,ax = plots.plot_xgb_importance(model=model, tick_label=ids_trn,
label=param["label"], importance_type=importance_type, sort=sort)
targetdir = aux.makedir(f'{args["plotdir"]}/train/xgboost-importance/{param["label"]}')
plt.savefig(f'{targetdir}/{param["label"]}--type_{importance_type}--sort-{sort}.pdf', bbox_inches='tight');
plt.close(fig)
## Plot decision trees
if ('plot_trees' in param) and param['plot_trees']:
try:
print(f'Plotting decision trees ...')
model.feature_names = ids_trn # Make it explicit
path = aux.makedir(f'{args["plotdir"]}/train/xgboost-treeviz/{param["label"]}')
for i in tqdm(range(num_epochs)):
xgboost.plot_tree(model, num_trees=i)
fig = plt.gcf(); fig.set_size_inches(60, 20) # Higher reso
plt.savefig(f'{path}/tree_{i}.pdf', bbox_inches='tight')
plt.close()
except:
print(f'Could not plot the decision trees (try: conda install python-graphviz)')
#model.feature_names = None # Set original default ones
return model
return # No return value for raytune