Python lmfitを使って、2次元ヒストグラムを2次元正規分布でフィッティングする (1次元も)

lmfit.github.io

pip3 install lmfit で lmfitをインストールする

コード

import numpy as np
import lmfit
from lmfit.lineshapes import gaussian2d, lorentzian
import matplotlib.pyplot as plt

npoints=10000
np.random.seed(0)
x = np.random.randn(npoints)*0.5 + 4 #標準偏差0.5, 平均4の正規分布
y = np.random.randn(npoints)*2   + 3 #標準偏差2.0, 平均3の正規分布

xrange=[0,10]
yrange=[-10,10]
bins=[50,100]
xbinw = (xrange[1]-xrange[0])/bins[0]
ybinw = (yrange[1]-yrange[0])/bins[1]

Z1, _, _ = np.histogram2d(x,y,bins=bins,range=[xrange,yrange])
Z1 = np.flipud(np.rot90(Z1)) # np.histogram2dの仕様上必要

X1, Y1 = np.meshgrid([xbinw*(n+0.5)+xrange[0] for n in range(bins[0])],
                   [ybinw*(n+0.5)+yrange[0] for n in range(bins[1])])

XYZ1 = np.array([X1.flatten(),Y1.flatten(),Z1.flatten()])

#3行目が0の列を削除
XYZ2 = XYZ1[:, XYZ1[2]>0]
arr_x=XYZ2[0]
arr_y=XYZ2[1]
arr_z=XYZ2[2]
arr_zerror = np.array(arr_z)**0.5

model = lmfit.models.Gaussian2dModel()
params = model.guess(arr_z, arr_x, arr_y)
result = model.fit(arr_z, x=arr_x, y=arr_y, params=params, weights=1.0/arr_zerror, scale_covar=False)
lmfit.report_fit(result)

print(result.best_values)

for mem in ['amplitude','centerx','sigmax','centery','sigmay']:
    print(mem+":", result.result.params[mem].value, result.result.params[mem].stderr)

print("chisq:",result.chisqr)
print("nfree:",result.nfree)

出力例

[[Fit Statistics]]
    # fitting method   = leastsq
    # function evals   = 38
    # data points      = 735
    # variables        = 5
    chi-square         = 723.976975
    reduced chi-square = 0.99174928
    Akaike info crit   = -1.10651903
    Bayesian info crit = 21.8928335
[[Variables]]
    amplitude:  375.869366 +/- 3.90951889 (1.04%) (init = 130.0867)
    centerx:    3.98925246 +/- 0.00514187 (0.13%) (init = 3.9)
    centery:    2.99002444 +/- 0.02071821 (0.69%) (init = 2.5)
    sigmax:     0.48257413 +/- 0.00397600 (0.82%) (init = 0.6333333)
    sigmay:     1.94203401 +/- 0.01613356 (0.83%) (init = 2.633333)
    fwhmx:      1.13637522 +/- 0.00936277 (0.82%) == '2.3548200*sigmax'
    fwhmy:      4.57314052 +/- 0.03799164 (0.83%) == '2.3548200*sigmay'
    height:     629.993300 +/- 9.71437855 (1.54%) == '1.5707963*amplitude/(max(1e-15, sigmax)*max(1e-15, sigmay))'
{'amplitude': 375.8693656732538, 'centerx': 3.989252456029961, 'centery': 2.990024444790775, 'sigmax': 0.48257413312606534, 'sigmay': 1.9420340083849474}
amplitude: 375.8693656732538 3.909518889796926
centerx: 3.989252456029961 0.00514187494937854
sigmax: 0.48257413312606534 0.003976003640136
centery: 2.990024444790775 0.02071821073133753
sigmay: 1.9420340083849474 0.016133562261156396
chisq: 723.9769746884517
nfree: 730

グラフ化のコード

import matplotlib
cdict = {'red': ((0.0, 1, 1), (1e-20, 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89, 1, 1), (1.0, 0.5, 0.5)), 
         'green': ((0.0, 1, 1), (1e-20, 0, 0), (0.125, 0, 0), (0.375, 1, 1), (0.64, 1, 1), (0.91, 0, 0), (1.0, 0, 0)), 
         'blue': ((0.0, 1, 1), (1e-20, 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65, 0, 0), (1.0, 0, 0))}

cmap = matplotlib.colors.LinearSegmentedColormap('jet2', cdict)


arr_fit = model.func(X1, Y1, **result.best_values)
plt.pcolor(X1, Y1, arr_fit, vmin=0, vmax=np.max(arr_fit)*1.2, shading='auto',cmap=cmap)
plt.title("fit")
plt.show()

plt.hist2d(x, y,range=[xrange,yrange],bins=bins,vmin=0,vmax=np.max(arr_fit)*1.2,cmap=cmap)
plt.show()

phst.hateblo.jp

ここで紹介したscipy.optimize.curve_fitを使った方法と結果を比較する。全く同じデータを使いたいので、以下のデータをダウンロードする。

ガウス分布のフィッティング用のサンプルデータ Sample data

import numpy as np
import lmfit
from lmfit.models import GaussianModel


arr_x = []
arr_y = []
arr_yerror = []

filename = ("gaus_sample.txt")
for line in open(filename, 'r'): #np.loadtxt等を使ったもっと簡潔な書き方があるはず
    item_list = line.split()
    if float(item_list[1]) != 0: # N=0 は排除
        arr_x.append(float(item_list[0]))
        arr_y.append(float(item_list[1]))
        arr_yerror.append(float(item_list[1]) ** 0.5) # 誤差はsqrt(N)

arr_x = np.array(arr_x)
arr_y = np.array(arr_y)
arr_yerror = np.array(arr_yerror)
mod = GaussianModel()

pars = mod.guess(arr_y, x=arr_x)
result = mod.fit(arr_y, pars, x=arr_x, weights=1/arr_yerror, scale_covar=False)
result.plot_fit()

print("Chi2 {:.6f}".format(result.chisqr))
print("          Estimate  Std. error")
params = result.result.params
print("Constant {:.6f}   {:.6f}".format(params["height"].value, params["height"].stderr))
print("Mean      {:.6f}   {:.6f}".format(params["center"].value, params["center"].stderr))
print("Sigma      {:.6f}   {:.6f}".format(params["sigma"].value, params["sigma"].stderr))

lmfit.report_fit(result)

lmfitで計算した出力は

Chi2 83.641822
          Estimate  Std. error
Constant 402.530663   4.963102
Mean      -0.003979   0.009894
Sigma      0.983115   0.007064

scipy.optimize.curve_fitの結果

Chi2 83.641822
            Estimate  Std. error
Constant  402.530641    4.963109
Mean       -0.003979    0.009894
Sigma       0.983115    0.007064

ROOT5の結果は

Constant 402.531 +/- 4.95816
Mean     -0.00397889 +/- 0.00989405
Sigma    0.983115 +/- 0.00704207
Chi2     83.6418

scale_covar=False をつけると、誤差も含めてROOT5やscipy.optimize.curve_fitとほぼ一致した。オプションの解説を読むと

scale_covar (bool, optional) – Whether to automatically scale the covariance matrix when calculating uncertainties (default is True).

とある。つまり、誤差を計算する時、分散共分散行列を自動的にスケールさせないときはscale_covar=Falseとする。scipyのcurve_fitでは、同等の機能は absolute_sigmabool=True で機能する。解説には以下のように書いてある。

absolute_sigmabool, optional
If True, sigma is used in an absolute sense and the estimated parameter covariance pcov reflects these absolute values.
If False (default), only the relative magnitudes of the sigma values matter.

真偽が逆になっていて分かりにくいが、スケールさせないscale_covar=False 、絶対値を使うabsolute_sigmabool=Trueを使うことを忘れないようにしたい。

関数としてまとめた

import numpy as np
import lmfit
from lmfit.models import GaussianModel

def fit_gauss(xs, ys, report=False):
    arr_x=[]
    arr_y=[]
    arr_yerror=[]
    for x,y in zip(xs,ys):
        if y<=0:continue
        arr_x.append(x)
        arr_y.append(y)
        arr_yerror.append(y**0.5)
    
    arr_x = np.array(arr_x)
    arr_y = np.array(arr_y)
    arr_yerror = np.array(arr_yerror)
    mod = GaussianModel()

    pars = mod.guess(arr_y, x=arr_x)
    result = mod.fit(arr_y, pars, x=arr_x, weights=1/arr_yerror, scale_covar=False)

    if report:
        result.plot_fit()
        print("Chi2 {:.6f}".format(result.chisqr))
        print("          Estimate  Std. error")
        params = result.result.params
        print("Constant {:.6f}   {:.6f}".format(params["height"].value, params["height"].stderr))
        print("Mean      {:.6f}   {:.6f}".format(params["center"].value, params["center"].stderr))
        print("Sigma      {:.6f}   {:.6f}".format(params["sigma"].value, params["sigma"].stderr))
        lmfit.report_fit(result)
    return result