Source code for icefit.lognormal

# Log-normal density parameters & generation
#
# Run with: python lognormal.py
#
# m.mieskolainen@imperial.ac.uk, 2024

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fsolve


[docs]
def lognormal_param(m, v, match_mode='mean', verbose=True):
    """
    Compute log-normal distribution \mu and \sigma parameters from
    the target mean (or median) m and variance v

    \mu and \sigma follow the Wikipedia convention:
    https://en.wikipedia.org/wiki/Log-normal_distribution

    Args:
        m      : mean (or median or mode) value of the target log-normal pdf
        v      : variance value of the target log-normal pdf
        match_mode : input m as 'mean', 'median', or 'mode'
    """
    if   match_mode == 'mean':
        """ CLOSED FORM SOLUTION """
        mu    = np.log(m**2/np.sqrt(v + m**2))
        sigma = np.sqrt(np.log(v/m**2 + 1))

    elif match_mode == 'median':
        """ CLOSED FORM SOLUTION """
        mu    = np.log(m)
        sigma = np.sqrt(np.log((np.sqrt(m**2 + 4*v)/m + 1)/2))

    elif match_mode == 'mode':
        """ NUMERICAL SOLUTION """
        
        def mode_func(x):
            d = np.array([m - np.exp(x[0] - x[1]), v - (np.exp(x[1]) - 1)*np.exp(2*x[0] + x[1])])
            return d
        
        init_guess = (np.log(m), np.log(1 + np.sqrt(v)/m))
        sol   = fsolve(mode_func, init_guess)
        mu    = sol[0]
        sigma = np.sqrt(sol[1])
    
    elif match_mode == 'geometric':
        """ CLOSED FORM SOLUTION """
        mu    = np.log(m) # m is median and also geometric mean
        sigma = np.log(1 + np.sqrt(v)/m) # from log(s_G), where s_G = exp(STD(log(X)))
    
    else:
        raise Exception(f'lognormal_param: Unknown match_mode {match_mode}')

    if verbose:
        print(f'lognormal_param:')
        print(f'  - Input:     m = {m:0.3f}, v = {v:0.3f}, sqrt[v] = {np.sqrt(v):0.3f}, sqrt[v] / m = {np.sqrt(v)/m:0.3f}, target mode = {match_mode}')
        print(f'  - Obtained parameters: mu = {mu:0.3f}, sigma = {sigma:0.3f}, exp(sigma) = {np.exp(sigma):0.3f}')
    
    return mu, sigma




[docs]
def rand_lognormal(m, v, N, match_mode='mean'):
    """
    Generate random numbers from log-normal density with
    the distribution target mean (or median) m and variance v

    Args:
        m      : mean (or median or mode) value of the target log-normal pdf
        v      : variance value of the target log-normal pdf
        N      : number of samples
        match_mode : input m as 'mean', 'median', or 'mode'
    """
    mu,sigma = lognormal_param(m=m, v=v, match_mode=match_mode)

    # Standard transform
    y = np.exp(mu + sigma * np.random.randn(N))

    return y



[docs]
def rand_powexp(m, std, N):
    """
    Power+exp type parametrization for median matching
    
    Args:
        m     : median target
        std   : standard deviation target
        N     : number of samples
    """
    theta = np.random.randn(N) # theta ~ exp(-0.5 * sigma^2)

    return m*(1 + std/m)**theta



[docs]
def create_label(X):
    mean  = np.mean(X)
    med   = np.median(X)
    std   = np.std(X)
    gsd   = np.exp(np.std(np.log(X))) # Geometric standard deviation
    
    # Find histogram mode
    bins = np.linspace(0, 5*med, 300)
    counts, xval = np.histogram(X, bins)
    mode = xval[np.argmax(counts)]

    return f'(mean,med,mode) = ({mean:0.1f}, {med:0.1f}, {mode:0.1f}), STD[X] = {std:0.2f}, GSD[X] = {gsd:0.2f}, std/mean = {std/mean:0.1f}, std/med = {std/med:0.1f}, std/mode = {std/mode:0.1f}'



[docs]
def test_lognormal():

    import pytest

    N     = int(1e7)

    # Nominal value
    m0    = 1.5
    
    # Different relative uncertainties
    delta_val = np.array([0.1, 0.4, 0.8, 1.6])
    
    # Different input modes
    modes  = ['mean', 'median', 'mode', 'geometric']
    symbol = ['\\bar{{m}}', 'm', '\\tilde{{m}}', 'm']
    
    fig, ax = plt.subplots(nrows=len(delta_val), ncols=len(modes), figsize=(20,15))
    for i in range(len(delta_val)):
        
        # log-normal pdf targets
        delta = delta_val[i] 
        std   = delta*m0
        
        # 2. Generate power-approx formula distributed variables
        Y  = rand_powexp(m=m0, std=std, N=N)
        Y_label = create_label(Y)
        
        for j in range(len(modes)):

            # 1. Generate log-normally distributed variables
            
            X       = rand_lognormal(m=m0, v=std**2, N=N, match_mode=modes[j])
            X_label = create_label(X)

            # Compute sample point statistics
            print(f'** {modes[j]} matching: m_0 = {m0:0.1f}, \delta = {delta:0.2f} **')
            print(f'{X_label} \n')

            title = f'$({symbol[j]} = {m0:0.1f}, \sqrt{{v}}/{symbol[j]} = {delta:0.1f})$'
            ax[i,j].set_title(f'<{modes[j]}> matching: {title}', fontsize=9)

            # ---------------
            
            bins = np.linspace(0, 5*np.median(X), 200)
            
            ax[i,j].hist(X, bins, label=f'E: {X_label}', histtype='step', density=True)
            ax[i,j].hist(Y, bins, label=f'A: {Y_label}', histtype='step', density=True)
            
            ax[i,j].legend(fontsize=4, loc='lower right')
            ax[i,j].set_xlim([0, m0*2.5])
            ax[i,j].set_ylim([0, None])
            
            if i == len(delta_val) - 1:
                ax[i,j].set_xlabel('$x$')

        print('----------------------------------------------------------')

    figname = 'sampling.pdf'
    print(f'Saving figure: {figname}')
    plt.savefig(figname, bbox_inches='tight')
    plt.close()



[docs]
def test_accuracy():
    """
    Test accuracy made in the power-exp approximation with median = 1
    """
    def expanded_s(v, m, order=1):

        if order >= 1:
            A = np.sqrt(v)/m
        if order >= 2:
            A +=  (1/2)*(v/m**2)
        if order >= 3:
            A += -(7/12)*(v**(3/2)/m**3)
        if order >= 4:
            A += -(17/24)*(v**2/m**4)
        if order >= 5:
            A +=  (163/160)*(v**(5/2)/m**5)
        if order >= 6:
            A +=  (1111/720)*(v**3/m**6)
        if order >= 7:
            A += -(96487/40320)*(v**(7/2)/m**7)

        return A
    
    def exact_s(v, m):
        return np.exp(np.sqrt(np.log(1/2 * (np.sqrt(4*v/m**2 + 1) + 1)))) - 1
    
    # Fixed m value
    m       = 1.5
    v       = np.linspace(1e-9, m*4, 10000)
    fig, ax = plt.subplots()
    plt.plot(np.sqrt(v) / m, np.ones(len(v)), color=(0.5,0.5,0.5), ls='--')

    for i in range(1,7+1):
        if i % 2:
            txt = f'$O(v^{{{i:.0f}/2}} / m^{{{i:.0f}}})$'
        else:
            txt = f'$O(v^{{{i/2:.0f}}} / m^{{{i:.0f}}})$'
        plt.plot(np.sqrt(v) / m, expanded_s(v=v, m=m, order=i) / exact_s(v=v, m=m), label=f'{txt}')

    plt.ylabel('[Expansion of $s$ $|_{near \\;\\; \\sqrt{v}/m=0}$] / Exact $s$')
    plt.xlabel('$\\sqrt{v} / m$')

    plt.legend()
    plt.xlim([0, 1.5])
    plt.ylim([0.8, 1.5])

    figname = 'series_approx.pdf'
    print(f'Saving figure: {figname}')
    plt.savefig(figname, bbox_inches='tight')
    plt.close()



[docs]
def main():
    test_accuracy()
    test_lognormal()

    

if __name__ == "__main__":
    main()