Source code for ixpeobssim.core.fitting

#!/usr/bin/env python
#
# Copyright (C) 2018--2021, the ixpeobssim team.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

from __future__ import print_function, division


import numpy
from scipy.optimize import curve_fit

from ixpeobssim.utils.environment import SCIPY_VERSION
import ixpeobssim.core.modeling
from ixpeobssim.utils.logging_ import logger

# pylint: disable=invalid-name


USE_ABSOLUTE_SIGMA = True



[docs]
def fit(model, xdata, ydata, p0=None, sigma=None, xmin=-numpy.inf,
        xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA, check_finite=True,
        method=None, verbose=True, **kwargs):
    """Lightweight wrapper over the ``scipy.optimize.curve_fit()`` function
    to take advantage of the modeling facilities. More specifically, in addition
    to performing the actual fit, we update all the model parameters so that,
    after the fact, we do have a complete picture of the fit outcome.

    Parameters
    ----------
    model : :py:class:`ixpeobssim.core.modeling.FitModelBase` instance callable
        The model function, f(x, ...).  It must take the independent
        variable as the first argument and the parameters to fit as
        separate remaining arguments.

    xdata : array_like
        The independent variable where the data is measured.

    ydata : array_like
        The dependent data --- nominally f(xdata, ...)

    p0 : None, scalar, or sequence, optional
        Initial guess for the parameters. If None, then the initial
        values will all be 1.

    sigma : None or array_like, optional
        Uncertainties in `ydata`. If None, all the uncertainties are set to
        1 and the fit becomes effectively unweighted.

    xmin : float
        The minimum value for the input x-values.

    xmax : float
        The maximum value for the input x-values.

    absolute_sigma : bool, optional
        If True, `sigma` is used in an absolute sense and the estimated
        parameter covariance `pcov` reflects these absolute values.
        If False, only the relative magnitudes of the `sigma` values matter.
        The returned parameter covariance matrix `pcov` is based on scaling
        `sigma` by a constant factor. This constant is set by demanding that the
        reduced `chisq` for the optimal parameters `popt` when using the
        *scaled* `sigma` equals unity.

    method : {'lm', 'trf', 'dogbox'}, optional
        Method to use for optimization.  See `least_squares` for more details.
        Default is 'lm' for unconstrained problems and 'trf' if `bounds` are
        provided. The method 'lm' won't work when the number of observations
        is less than the number of variables, use 'trf' or 'dogbox' in this
        case.

    verbose : bool
        Print the model if True.

    kwargs
        Keyword arguments passed to `leastsq` for ``method='lm'`` or
        `least_squares` otherwise.
    """
    # Select data based on the x-axis range passed as an argument.
    _mask = numpy.logical_and(xdata >= xmin, xdata <= xmax)
    xdata = xdata[_mask]
    ydata = ydata[_mask]
    if len(xdata) <= len(model.parameters):
        raise RuntimeError('Not enough data to fit (%d points)' % len(xdata))
    if isinstance(sigma, numpy.ndarray):
        sigma = sigma[_mask]
    # If the model has a Jacobian defined, go ahead and use it.
    try:
        jac = model.jacobian
    except:
        jac = None
    # If we are not passing default starting points for the model parameters,
    # try and do something sensible.
    if p0 is None:
        model.init_parameters(xdata, ydata, sigma)
        p0 = model.parameters
        if verbose:
            logger.debug('%s parameters initialized to %s.', model.name(), p0)
    # If sigma is None, assume all the errors are 1. (If we don't do this,
    # the code will crash when calculating the chisquare.
    if sigma is None:
        sigma = numpy.full((len(ydata), ), 1.)
    # Do the actual fit---note the horrible hack to support scipy versions prior
    # to 0.18, not supporting a user-defined jacobian.
    if SCIPY_VERSION < '0.18':
        logger.warning('Scipy version < 0.18 found, ignoring Jacobian...')
        popt, pcov = curve_fit(model, xdata, ydata, p0, sigma, absolute_sigma,
                               check_finite, model.bounds, method, **kwargs)
    else:
        popt, pcov = curve_fit(model, xdata, ydata, p0, sigma, absolute_sigma,
                               check_finite, model.bounds, method, jac,
                               **kwargs)
    # Update the model parameters.
    model.set_plotting_range(xdata.min(), xdata.max())
    model.parameters = popt
    model.covariance_matrix = pcov
    model.chisq = (((ydata - model(xdata))/sigma)**2).sum()
    model.ndof = len(ydata) - len(model)
    if verbose:
        print(model)
    return model




[docs]
def fit_histogram(model, histogram, p0=None, sigma=None, xmin=-numpy.inf,
                  xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
                  check_finite=True, method=None, verbose=True, **kwargs):
    """Fit a histogram to a given model.

    This is basically calling :meth:`ixpeobssim.core.fitting.fit` with some
    pre-processing to turn the histogram bin edges and content into
    x-y data. Particularly, the bin centers are taken as the independent
    data series, the bin contents are taken as the dependent data saries,
    and the square root of the counts as the Poisson error.

    For additional parameters look at the documentation of the
    :meth:`ixpeobssim.core.fitting.fit`

    Parameters
    ----------
    model : :py:class:`ixpeobssim.core.modeling.FitModelBase` instance or
        callable
        The fit model.

    histogram : ixpeHistogram1d instance
        The histogram to be fitted.

    Warning
    -------
    We're not quite doing the right thing, here, as we should integrate
    the model within each histogram bin and compare that to the counts,
    but this is not an unreasonable first-order approximation. We might want
    to revise this, especially since we can probably provide an analytic
    integral for most of the model we need.
    """
    assert histogram.num_axes == 1
    _mask = (histogram.content > 0)
    xdata = histogram.bin_centers(0)[_mask]
    ydata = histogram.content[_mask]
    if sigma is None:
        sigma = numpy.sqrt(ydata)
    return fit(model, xdata, ydata, p0, sigma, xmin, xmax, absolute_sigma,
               check_finite, method, verbose, **kwargs)




[docs]
def fit_modulation_curve(histogram, p0=None, sigma=None, xmin=-numpy.inf,
                         xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
                         check_finite=True, method=None, verbose=True,
                         degrees=True, **kwargs):
    """Fit a modulation curve.

    For all the other parameters look at the documentation of the
    :meth:`ixpeobssim.core.fitting.fit_histogram`
    """
    if degrees:
        model = ixpeobssim.core.modeling.xModulationCurveDeg()
    else:
        model = ixpeobssim.core.modeling.xModulationCurveRad()
    return fit_histogram(model, histogram, p0, sigma, xmin, xmax,
                         absolute_sigma, check_finite, method, verbose,
                         **kwargs)




[docs]
def fit_gaussian_iterative(histogram, p0=None, sigma=None, xmin=-numpy.inf,
                           xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
                           check_finite=True, method=None, verbose=True,
                           num_sigma_left=2., num_sigma_right=2.,
                           num_iterations=2, **kwargs):
    """Fit the core of a gaussian histogram within a given number of sigma
    around the peak.

    This function performs a first round of fit to the data and then
    repeats the fit iteratively limiting the fit range to a specified
    interval defined in terms of deviations (in sigma) around the peak.

    For additional parameters look at the documentation of the
    :meth:`ixpeobssim.core.fitting.fit_histogram`

    Parameters
    ----------
    num_sigma_left : float
        The number of sigma on the left of the peak to be used to define the
        fitting range.

    num_sigma_right : float
        The number of sigma on the right of the peak to be used to define the
        fitting range.

    num_iterations : int
        The number of iterations of the fit.
    """
    model = ixpeobssim.core.modeling.xGaussian()
    fit_histogram(model, histogram, p0, sigma, xmin, xmax, absolute_sigma,
                  check_finite, method, verbose, **kwargs)
    for i in range(num_iterations):
        xmin = model.peak - num_sigma_left * model.sigma
        xmax = model.peak + num_sigma_right * model.sigma
        try:
            fit_histogram(model, histogram, p0, sigma, xmin, xmax,
                          absolute_sigma, check_finite, method, verbose,
                          **kwargs)
        except RuntimeError as e:
            raise RuntimeError('%s after %d iteration(s)' % (e, i + 1))
    return model




def _analytical_fit_base(x, y):
    """Basic common routine for performing analytical fits.

    The basic idea is to have a framework to perform simple, un-weighted least-squares
    fits (e.g., with linear or power-law models) that have a closed-form solution
    and can be performed analytically. Note this is not computing fit errors, but
    on the plus side it offers limited support for vectorized fits, which is
    sometimes useful to perform extrapolation of noisy data. More precisely,
    we support three cases:

    * x and y are two one dimensional arrays of the same shape, in which case
      we provide scalar fit parameters;
    * x is a 1-dimensional array with shape (N,), and y is a 2-dimensional
      arrays with shape (N, M), in which case we interpret the situation as M
      fits where the independent data array is the same for all the data arrays
      of the dependent variable, and an array of values of lenght M is returned
      for each parameter;
    * x and y are both 2-dimensional arrays with shape (N, M), which is
      similar to the previous case, except for the fact that the M data
      arrays for the independent variable are all different.

    Arguments
    ---------
    x : array_like
        The x data, can be either one- (N) or two- (N, M) dimensional.

    y : array_like
        The y data, can be either one- (N) or two- (N, M) dimensional.
    """
    # Make sure the input data are in the form on numpy arrays.
    if not isinstance(x, numpy.ndarray):
        x = numpy.array(x)
    if not isinstance(y, numpy.ndarray):
        y = numpy.array(y)
    # Handle the common case where we pass a single array for x and multiple arrays
    # for the y---in this case we simply tile the x with enough identical copies
    # to match the y shape.
    if x.shape != y.shape:
        x = numpy.tile(x, (y.shape[1], 1)).T
    # And, at this point, the shapes of x and y should really match.
    assert x.shape == y.shape
    n = x.shape[0]
    return x, y, n



[docs]
def linear_analytical_fit(x, y, dy=None):
    """Simple vectorized, analytical linear fit.

    See https://mathworld.wolfram.com/LeastSquaresFitting.html
    """
    x, y, _ = _analytical_fit_base(x, y)
    if dy is None:
        dy = numpy.full(y.shape, 1.)
    w = 1. / dy**2.
    s0 = w.sum(axis=0)
    sx = (x * w).sum(axis=0)
    sx2 = (x**2. * w).sum(axis=0)
    sy = (y * w).sum(axis=0)
    sxy = (x * y * w).sum(axis=0)
    D = s0 * sx2 - sx**2.
    m = (s0 * sxy - sx * sy) / D
    q = (sy * sx2 - sx * sxy) / D
    return m, q




[docs]
def power_law_analytical_fit(x, y):
    """Simple vectorized, analytical linear fit.

    See https://mathworld.wolfram.com/LeastSquaresFitting.html
    """
    x, y, n = _analytical_fit_base(x, y)
    logx, logy = numpy.log(x), numpy.log(y)
    sx = logx.sum(axis=0)
    sx2 = (logx**2.).sum(axis=0)
    sy = logy.sum(axis=0)
    sxy = (logx * logy).sum(axis=0)
    index = (n * sxy - sx * sy) / (n * sx2 - sx**2.)
    norm = numpy.exp((sy - index * sx) / n)
    return norm, index
Source code for ixpeobssim.core.fitting

Navigation

Related Topics