In some scenarios, for instance, in Xilinx’s Vivado HLS, we may need to write our own curve fitting function as such function is not built-in and external library may not be quick enough to handle our needs.
In this article, A rapid Guassian fitting method suggested by Jacquelin will be introduced. In my case, it is around 200x faster compared to the python’s lmfit library.
The model used in Jacquelin’s article is:
Its peak can be calculated using these formulas:
However, the calculation of the sigma was incorrect in Jacquelin’s article written in 2014 and is amended later. If you are interested, please have a look of the original article. The updated one here. Correct formulas used in this article:
The data set below will be used:
The python code using lmit library fitting a Gaussian model to the data set:
import numpy as np
from lmfit.models import GaussianModel
import matplotlib.pyplot as plt
import time
ydata = np.array([2.727050781,2.749023438,2.951660156,3.276367188,3.286132813,3.337402344,3.286132813,3.115234375,3.032226563,2.961425781,2.785644531])
xdata = np.array(range(len(ydata)))
xdata = xdata+1 #x starts from position 1
gmodel = GaussianModel()
start_time = time.time()
params = gmodel.make_params(amplitude=ydata.max(),
center=xdata.mean(),
sigma=xdata.std())
result = gmodel.fit(ydata, params, x=xdata)
end_time = time.time() - start_time
print("elapsed time: ",end_time)
print(result.fit_report())
plt.scatter(xdata,ydata,label='data points')
plt.plot(xdata, result.best_fit, 'r-')
plt.show()
We will get
elapsed time: 0.0069561004638671875
center: 6.20838810 +/- 0.17609727 (2.84%) (init = 6)
sigma: 7.97128486 +/- 0.49022793 (6.15%) (init = 3.162278)
The python code fitting a Gaussian model to the data set using Jacquelin’s method [Repeating it for 100 times]:
import time
import math
import numpy as np
ydata = np.array([2.727050781,2.749023438,2.951660156,3.276367188,3.286132813,3.337402344,3.286132813,3.115234375,3.032226563,2.961425781,2.785644531])
xdata = np.array(range(len(ydata)))
xdata = xdata+1 #x starts from position 1
delta = 0
Sk2 = 0
Tk2 = 0
SkTk = 0
delSk = 0
delTk = 0
Sk = [0]*100
Tk = [0]*100
i = 0
start_time2 = time.time()
while i < 100:
delta = 0
Sk2 = 0
Tk2 = 0
SkTk = 0
delSk = 0
delTk = 0
Sk = [0]*100
Tk = [0]*100
j = 1
while (j <= 11 - 1):
Sk[j] = Sk[j-1] + 0.5 * (ydata[j] + ydata[j - 1]) # you may need to multipy 0.5 * (data[j] + data[j - 1]) by (xk - xk_1) if xk - xk_1 != 1
Tk[j] = Tk[j-1] + 0.5 * ((j + 1) * ydata[j] + (j) * ydata[j - 1]) # you may need to multipy 0.5 * ((j + 1) * data[j] + (j) * data[j - 1]) by (xk - xk_1) if xk - xk_1 != 1
temp = Sk[j]
temp2 = Tk[j]
Sk2 += pow(temp,2)
Tk2 += pow(temp2,2)
SkTk += temp * temp2
delta = ydata[j] - ydata[0]
delSk += delta * temp
delTk += delta * temp2
j+=1
det = Sk2 * Tk2 - pow(SkTk,2);
A = Tk2 / det * delSk - SkTk / det * delTk;
B = - SkTk / det * delSk + Sk2 / det * delTk;
peak = -A/B;
sigma = math.sqrt(-1/B)#you may comment sigma out if you just want to use this fucntion for peak search only
i+=1
end_time2 = time.time() - start_time2
print("elapsed time: ",end_time2)
print("peak = ",peak)
print("sigma = ",sigma)
We will get
elapsed time: 0.0029921531677246094
peak = 6.106384024490814
sigma = 8.332941896208029
which is similar to the lmfit library.
The C program fitting a Gaussian model to the data set using Jacquelin’s method:
#include <stdio.h>
#include <math.h>
#include <time.h>
#define data_size 12
double data[data_size] = {2.72705078125000,2.74902343750000,2.95166015625000,3.27636718750000,3.28613281250000,3.33740234375000,3.28613281250000,3.11523437500000,3.03222656250000,2.96142578125000,2.78564453125000};
double delta = 0;
double Sk2 = 0;
double Tk2 = 0;
double SkTk = 0;
double delSk = 0;
double delTk = 0;
double Sk[100] = {0};
double Tk[100] = {0};
double temp = 0; //it will be used to store Sk[j]
double temp2 = 0; //it will be used to store Tk[j]
double det = 0;
double A = 0;
double B = 0;
double peak = 0;
double sigma = 0;
int main()
{
clock_t t;
t = clock();
for(int j = 1; j < data_size - 1; j++)
{
Sk[j] = Sk[j-1] + 0.5 * (data[j] + data[j - 1]); // you may need to multipy 0.5 * (data[j] + data[j - 1]) by (xk - xk_1) if xk - xk_1 != 1
Tk[j] = Tk[j-1] + 0.5 * ((j + 1) * data[j] + (j) * data[j - 1]); // you may need to multipy 0.5 * ((j + 1) * data[j] + (j) * data[j - 1]) by (xk - xk_1) if xk - xk_1 != 1
temp = Sk[j];
temp2 = Tk[j];
Sk2 += pow(temp,2);
Tk2 += pow(temp2,2);
SkTk += temp * temp2;
delta = data[j] - data[0];
delSk += delta * temp;
delTk += delta * temp2;
}
det = Sk2 * Tk2 - pow(SkTk,2);
A = Tk2 / det * delSk - SkTk / det * delTk;
B = - SkTk / det * delSk + Sk2 / det * delTk;
peak = -A/B;
t = clock() - t;
double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
printf("Total elapsed time = %f\n", time_taken);
printf("%f\n",peak);
sigma = sqrt(-1/B);
printf("%f\n",sigma);
return 0;
}
We will get:
Total elapsed time = 0.000024
6.106384
8.332942