# Source code for ixpeobssim.core.fitting

```
#!/usr/bin/env python
#
# Copyright (C) 2018--2021, the ixpeobssim team.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from __future__ import print_function, division
import numpy
from scipy.optimize import curve_fit
from ixpeobssim.utils.environment import SCIPY_VERSION
import ixpeobssim.core.modeling
from ixpeobssim.utils.logging_ import logger
# pylint: disable=invalid-name
USE_ABSOLUTE_SIGMA = True
[docs]
def fit(model, xdata, ydata, p0=None, sigma=None, xmin=-numpy.inf,
xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA, check_finite=True,
method=None, verbose=True, **kwargs):
"""Lightweight wrapper over the ``scipy.optimize.curve_fit()`` function
to take advantage of the modeling facilities. More specifically, in addition
to performing the actual fit, we update all the model parameters so that,
after the fact, we do have a complete picture of the fit outcome.
Parameters
----------
model : :py:class:`ixpeobssim.core.modeling.FitModelBase` instance callable
The model function, f(x, ...). It must take the independent
variable as the first argument and the parameters to fit as
separate remaining arguments.
xdata : array_like
The independent variable where the data is measured.
ydata : array_like
The dependent data --- nominally f(xdata, ...)
p0 : None, scalar, or sequence, optional
Initial guess for the parameters. If None, then the initial
values will all be 1.
sigma : None or array_like, optional
Uncertainties in `ydata`. If None, all the uncertainties are set to
1 and the fit becomes effectively unweighted.
xmin : float
The minimum value for the input x-values.
xmax : float
The maximum value for the input x-values.
absolute_sigma : bool, optional
If True, `sigma` is used in an absolute sense and the estimated
parameter covariance `pcov` reflects these absolute values.
If False, only the relative magnitudes of the `sigma` values matter.
The returned parameter covariance matrix `pcov` is based on scaling
`sigma` by a constant factor. This constant is set by demanding that the
reduced `chisq` for the optimal parameters `popt` when using the
*scaled* `sigma` equals unity.
method : {'lm', 'trf', 'dogbox'}, optional
Method to use for optimization. See `least_squares` for more details.
Default is 'lm' for unconstrained problems and 'trf' if `bounds` are
provided. The method 'lm' won't work when the number of observations
is less than the number of variables, use 'trf' or 'dogbox' in this
case.
verbose : bool
Print the model if True.
kwargs
Keyword arguments passed to `leastsq` for ``method='lm'`` or
`least_squares` otherwise.
"""
# Select data based on the x-axis range passed as an argument.
_mask = numpy.logical_and(xdata >= xmin, xdata <= xmax)
xdata = xdata[_mask]
ydata = ydata[_mask]
if len(xdata) <= len(model.parameters):
raise RuntimeError('Not enough data to fit (%d points)' % len(xdata))
if isinstance(sigma, numpy.ndarray):
sigma = sigma[_mask]
# If the model has a Jacobian defined, go ahead and use it.
try:
jac = model.jacobian
except:
jac = None
# If we are not passing default starting points for the model parameters,
# try and do something sensible.
if p0 is None:
model.init_parameters(xdata, ydata, sigma)
p0 = model.parameters
if verbose:
logger.debug('%s parameters initialized to %s.', model.name(), p0)
# If sigma is None, assume all the errors are 1. (If we don't do this,
# the code will crash when calculating the chisquare.
if sigma is None:
sigma = numpy.full((len(ydata), ), 1.)
# Do the actual fit---note the horrible hack to support scipy versions prior
# to 0.18, not supporting a user-defined jacobian.
if SCIPY_VERSION < '0.18':
logger.warning('Scipy version < 0.18 found, ignoring Jacobian...')
popt, pcov = curve_fit(model, xdata, ydata, p0, sigma, absolute_sigma,
check_finite, model.bounds, method, **kwargs)
else:
popt, pcov = curve_fit(model, xdata, ydata, p0, sigma, absolute_sigma,
check_finite, model.bounds, method, jac,
**kwargs)
# Update the model parameters.
model.set_plotting_range(xdata.min(), xdata.max())
model.parameters = popt
model.covariance_matrix = pcov
model.chisq = (((ydata - model(xdata))/sigma)**2).sum()
model.ndof = len(ydata) - len(model)
if verbose:
print(model)
return model
[docs]
def fit_histogram(model, histogram, p0=None, sigma=None, xmin=-numpy.inf,
xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
check_finite=True, method=None, verbose=True, **kwargs):
"""Fit a histogram to a given model.
This is basically calling :meth:`ixpeobssim.core.fitting.fit` with some
pre-processing to turn the histogram bin edges and content into
x-y data. Particularly, the bin centers are taken as the independent
data series, the bin contents are taken as the dependent data saries,
and the square root of the counts as the Poisson error.
For additional parameters look at the documentation of the
:meth:`ixpeobssim.core.fitting.fit`
Parameters
----------
model : :py:class:`ixpeobssim.core.modeling.FitModelBase` instance or
callable
The fit model.
histogram : ixpeHistogram1d instance
The histogram to be fitted.
Warning
-------
We're not quite doing the right thing, here, as we should integrate
the model within each histogram bin and compare that to the counts,
but this is not an unreasonable first-order approximation. We might want
to revise this, especially since we can probably provide an analytic
integral for most of the model we need.
"""
assert histogram.num_axes == 1
_mask = (histogram.content > 0)
xdata = histogram.bin_centers(0)[_mask]
ydata = histogram.content[_mask]
if sigma is None:
sigma = numpy.sqrt(ydata)
return fit(model, xdata, ydata, p0, sigma, xmin, xmax, absolute_sigma,
check_finite, method, verbose, **kwargs)
[docs]
def fit_modulation_curve(histogram, p0=None, sigma=None, xmin=-numpy.inf,
xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
check_finite=True, method=None, verbose=True,
degrees=True, **kwargs):
"""Fit a modulation curve.
For all the other parameters look at the documentation of the
:meth:`ixpeobssim.core.fitting.fit_histogram`
"""
if degrees:
model = ixpeobssim.core.modeling.xModulationCurveDeg()
else:
model = ixpeobssim.core.modeling.xModulationCurveRad()
return fit_histogram(model, histogram, p0, sigma, xmin, xmax,
absolute_sigma, check_finite, method, verbose,
**kwargs)
[docs]
def fit_gaussian_iterative(histogram, p0=None, sigma=None, xmin=-numpy.inf,
xmax=numpy.inf, absolute_sigma=USE_ABSOLUTE_SIGMA,
check_finite=True, method=None, verbose=True,
num_sigma_left=2., num_sigma_right=2.,
num_iterations=2, **kwargs):
"""Fit the core of a gaussian histogram within a given number of sigma
around the peak.
This function performs a first round of fit to the data and then
repeats the fit iteratively limiting the fit range to a specified
interval defined in terms of deviations (in sigma) around the peak.
For additional parameters look at the documentation of the
:meth:`ixpeobssim.core.fitting.fit_histogram`
Parameters
----------
num_sigma_left : float
The number of sigma on the left of the peak to be used to define the
fitting range.
num_sigma_right : float
The number of sigma on the right of the peak to be used to define the
fitting range.
num_iterations : int
The number of iterations of the fit.
"""
model = ixpeobssim.core.modeling.xGaussian()
fit_histogram(model, histogram, p0, sigma, xmin, xmax, absolute_sigma,
check_finite, method, verbose, **kwargs)
for i in range(num_iterations):
xmin = model.peak - num_sigma_left * model.sigma
xmax = model.peak + num_sigma_right * model.sigma
try:
fit_histogram(model, histogram, p0, sigma, xmin, xmax,
absolute_sigma, check_finite, method, verbose,
**kwargs)
except RuntimeError as e:
raise RuntimeError('%s after %d iteration(s)' % (e, i + 1))
return model
def _analytical_fit_base(x, y):
"""Basic common routine for performing analytical fits.
The basic idea is to have a framework to perform simple, un-weighted least-squares
fits (e.g., with linear or power-law models) that have a closed-form solution
and can be performed analytically. Note this is not computing fit errors, but
on the plus side it offers limited support for vectorized fits, which is
sometimes useful to perform extrapolation of noisy data. More precisely,
we support three cases:
* x and y are two one dimensional arrays of the same shape, in which case
we provide scalar fit parameters;
* x is a 1-dimensional array with shape (N,), and y is a 2-dimensional
arrays with shape (N, M), in which case we interpret the situation as M
fits where the independent data array is the same for all the data arrays
of the dependent variable, and an array of values of lenght M is returned
for each parameter;
* x and y are both 2-dimensional arrays with shape (N, M), which is
similar to the previous case, except for the fact that the M data
arrays for the independent variable are all different.
Arguments
---------
x : array_like
The x data, can be either one- (N) or two- (N, M) dimensional.
y : array_like
The y data, can be either one- (N) or two- (N, M) dimensional.
"""
# Make sure the input data are in the form on numpy arrays.
if not isinstance(x, numpy.ndarray):
x = numpy.array(x)
if not isinstance(y, numpy.ndarray):
y = numpy.array(y)
# Handle the common case where we pass a single array for x and multiple arrays
# for the y---in this case we simply tile the x with enough identical copies
# to match the y shape.
if x.shape != y.shape:
x = numpy.tile(x, (y.shape[1], 1)).T
# And, at this point, the shapes of x and y should really match.
assert x.shape == y.shape
n = x.shape[0]
return x, y, n
[docs]
def linear_analytical_fit(x, y, dy=None):
"""Simple vectorized, analytical linear fit.
See https://mathworld.wolfram.com/LeastSquaresFitting.html
"""
x, y, _ = _analytical_fit_base(x, y)
if dy is None:
dy = numpy.full(y.shape, 1.)
w = 1. / dy**2.
s0 = w.sum(axis=0)
sx = (x * w).sum(axis=0)
sx2 = (x**2. * w).sum(axis=0)
sy = (y * w).sum(axis=0)
sxy = (x * y * w).sum(axis=0)
D = s0 * sx2 - sx**2.
m = (s0 * sxy - sx * sy) / D
q = (sy * sx2 - sx * sxy) / D
return m, q
[docs]
def power_law_analytical_fit(x, y):
"""Simple vectorized, analytical linear fit.
See https://mathworld.wolfram.com/LeastSquaresFitting.html
"""
x, y, n = _analytical_fit_base(x, y)
logx, logy = numpy.log(x), numpy.log(y)
sx = logx.sum(axis=0)
sx2 = (logx**2.).sum(axis=0)
sy = logy.sum(axis=0)
sxy = (logx * logy).sum(axis=0)
index = (n * sxy - sx * sy) / (n * sx2 - sx**2.)
norm = numpy.exp((sy - index * sx) / n)
return norm, index
```