[Curve Fitting & peak search] Fast fitting a Guassian function to data points using C & python

In this article, A high speed Guassian fitting method suggested by Guo’s will be introduced. In my case, it is around 200x faster compared to the python’s lmfit library and has a similar speed compared to previously demonstrated Jacquelin’s method.

The model used in Guo’s article is:

Its peak can be calculated using these formulas:

If you are interested, please have a look of the original article. The data set below will be used:

Same as my previous article, the python code using lmit library fitting a Gaussian model to the data set:

import numpy as np
from lmfit.models import GaussianModel
import matplotlib.pyplot as plt
import time

ydata = np.array([2.727050781,2.749023438,2.951660156,3.276367188,3.286132813,3.337402344,3.286132813,3.115234375,3.032226563,2.961425781,2.785644531])
xdata = np.array(range(len(ydata)))
xdata = xdata+1 #x starts from position 1

gmodel = GaussianModel()

start_time = time.time()

params = gmodel.make_params(amplitude=ydata.max(),
                            center=xdata.mean(),
                            sigma=xdata.std())
result = gmodel.fit(ydata, params, x=xdata)

end_time = time.time() - start_time
print("elapsed time: ",end_time)

print(result.fit_report())
plt.scatter(xdata,ydata,label='data points')
plt.plot(xdata, result.best_fit, 'r-')
plt.show()

We will get

elapsed time:  0.0069561004638671875
center:     6.20838810 +/- 0.17609727 (2.84%) (init = 6)
sigma:      7.97128486 +/- 0.49022793 (6.15%) (init = 3.162278)

The python code fitting a Gaussian model to the data set using Guo’s method [Repeating it for 100 times]:

import time
import math
import numpy as np

ydata = np.array([2.727050781,2.749023438,2.951660156,3.276367188,3.286132813,3.337402344,3.286132813,3.115234375,3.032226563,2.961425781,2.785644531])
xdata = np.array(range(len(ydata)))
xdata = xdata+1 #x starts from position 1

x = [0]*9
y = [0]*3
x_1 = 0 #x
x_2 = 0 #x^2
x_3 = 0 #x^3
x_4 = 0 #x^4
ln_y = 0
xln_y= 0
x_2ln_y = 0
start_time3 = time.time()
   
i = 0
while i < 100: 
    x = [0]*9;
    y = [0]*3;
    x_1 = 0
    x_2 = 0
    x_3 = 0
    x_4 = 0
    ln_y = 0
    xln_y= 0
    x_2ln_y = 0    
    
    j = 1
    while (j <= 12 - 1):
        temp_data = j
        x_1 += temp_data
        x_2 += pow(temp_data,2)
        x_3 += pow(temp_data,3)
        x_4 += pow(temp_data,4)
        temp_ln_y = math.log(ydata[j-1])
        ln_y += temp_ln_y
        xln_y += temp_data*temp_ln_y
        x_2ln_y += pow(temp_data,2)*temp_ln_y
        j+=1
    
    x[0] = len(ydata)
    x[1] = x_1
    x[2] = x_2
    x[3] = x_1
    x[4] = x_2
    x[5] = x_3
    x[6] = x_2
    x[7] = x_3
    x[8] = x_4
    y[0] = ln_y
    y[1] = xln_y
    y[2] = x_2ln_y
    
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    #%%Gaussian elimiation
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    common = x[3]/x[0]
    x[4] -= x[1] * common
    x[5] -= x[2] * common
    y[1] -= y[0] * common
    
    common2 = x[6]/x[0]
    x[7] -= x[1] * common2
    x[8] -= x[2] * common2
    y[2] -= y[0] * common2
    
    common3 = x[7]/x[4]
    x[8] -= x[5] * common3
    y[2] -= y[1] * common3
    
    c = y[2]/x[8]
    b = (y[1] - c * x[5])/x[4]

    peak = -0.5*b/c
    sigma = math.sqrt(-0.5/c)
    i+=1

end_time3 = time.time() - start_time3
print("elapsed time: ",end_time3)    
print("peak = ",peak)
print("sigma = ",sigma)

We will get

elapsed time:  0.0029916763305664062
peak =  6.226874114976617
sigma =  8.06247957982415

which is similar to the lmfit library.

By the way, I have mentioned about how to solve a similar linear system using python and c before, you may have a look here if you are interested.

The C program fitting a Gaussian model to the data set using Guo’s method:

#include <stdio.h>
#include <math.h>
#include <time.h>
#define data_size 12
double data[data_size] = {2.72705078125000,2.74902343750000,2.95166015625000,3.27636718750000,3.28613281250000,3.33740234375000,3.28613281250000,3.11523437500000,3.03222656250000,2.96142578125000,2.78564453125000};
double x[9] = {0};
double y[3] = {0};
double x_1 = 0;
double x_2 = 0;
double x_3 = 0;
double x_4 = 0;
double ln_y = 0;
double xln_y= 0;
double x_2ln_y = 0;

double temp_data = 0; //store x location
double temp_ln_y = 0; 

double common = 0;
double common2 = 0;
double common3 = 0;
double b = 0;
double c = 0;
double peak = 0;
double sigma = 0;
int main()
{
        clock_t t;
        t = clock();
   
		for(int j = 1; j <= data_size - 1; j++)
		{   
			temp_data = j;
            x_1 += temp_data;
            x_2 += pow(temp_data,2);
            x_3 += pow(temp_data,3);
            x_4 += pow(temp_data,4);
            temp_ln_y = log(data[j-1]);
			ln_y += temp_ln_y;
			xln_y += temp_data*temp_ln_y;
			x_2ln_y += pow(temp_data,2)*temp_ln_y;
			
		}

		x[0] = data_size-1;
		x[1] = x_1;
		x[2] = x_2;
		x[3] = x_1;
		x[4] = x_2;
		x[5] = x_3;
		x[6] = x_2;
		x[7] = x_3;
		x[8] = x_4;
		y[0] = ln_y;
		y[1] = xln_y;
		y[2] = x_2ln_y;

    	//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    	//%%	Gaussian elimiation
    	//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    	common = x[3]/x[0];
    	x[4] -= x[1] * common;
    	x[5] -= x[2] * common;
    	y[1] -= y[0] * common;
    
    	common2 = x[6]/x[0];
    	x[7] -= x[1] * common2;
    	x[8] -= x[2] * common2;
    	y[2] -= y[0] * common2;
    
    	common3 = x[7]/x[4];
    	x[8] -= x[5] * common3;
    	y[2] -= y[1] * common3;

	    c = y[2]/x[8];
	    b = (y[1] - c * x[5])/x[4];

        t = clock() - t;
        double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
        printf("Total elapsed time = %f", time_taken);
        
        peak = -0.5*b/c;
        printf("\n%.9f\n",peak);
		sigma = sqrt(-0.5/c);
		printf("%f\n",sigma);
		

    return 0;
}

We will get

Total elapsed time = 0.000021
6.226874116
8.062480

Leave a Reply Cancel reply