Commit ec6521b9 authored by flothoni's avatar flothoni

tools/MRDscript; refactor linear regression function

parent af6645ea
......@@ -41,6 +41,7 @@ import json
import os
import math
import getopt
from collections import defaultdict
import argparse
import os.path
############################################################
......@@ -69,65 +70,50 @@ vidjilToUserFamily = {
### routines
############################################################
### function for curve fitting y = ax
### just slope, no intercept, to force (0,0)
def linearRegression(y, x):
### function for curve fitting
def regression(y, x, mode="linear"):
"""
Function for curve fitting, with a given mode to select.
If mode 'linear': curve fitting y = ax; just slope, no intercept, to force (0,0)
If mode 'linearAxPB': curve fitting y = ax+b; to agree with Gui's results
"""
lr = {}
n = len(y) # should be same as len(x)
lr['n'] = n
sum_x = 0.0
sum_y = 0.0
sum_xy = 0.0
sum_xx = 0.0
sum_yy = 0.0
sums = defaultdict(lambda: 0.0)
sums["x"] = 0.0
sums["y"] = 0.0
sums["xy"] = 0.0
sums["xx"] = 0.0
sums["yy"] = 0.0
for i in range(n):
sum_x += x[i]
sum_y += y[i]
sum_xy += (x[i]*y[i])
sum_xx += (x[i]*x[i])
sum_yy += (y[i]*y[i])
sums["x"] += x[i]
sums["y"] += y[i]
sums["xy"] += (x[i]*y[i])
sums["xx"] += (x[i]*x[i])
sums["yy"] += (y[i]*y[i])
if mode == "linear":
lr['slope'] = sums["xy"] / sums["xx"]
denom = (n*sums["xx"]-sums["x"]*sums["x"])*(n*sums["yy"]-sums["y"]*sums["y"])
lr['r2'] = math.pow((n*sums["xy"] - sums["x"]*sums["y"]),2)/denom if denom > 0 else 0.0
lr['s'] = math.sqrt((sums["yy"] - sums["xy"]*sums["xy"]/sums["xx"])/n)
elif mode == "linearAxPB":
## http://mathworld.wolfram.com/LeastSquaresFitting.html
nss_xx = n*sums["xx"] - sums["x"]*sums["x"]
nss_yy = n*sums["yy"] - sums["y"]*sums["y"]
lr['slope'] = (n*sums["xy"] - sums["x"]*sums["y"]) / nss_xx
lr['intcp'] = (sums["y"]*sums["xx"] - sums["x"]*sums["xy"]) / nss_xx
nss_xy = n*sums["xy"] - sums["x"]*sums["y"]
denom = nss_xx*nss_yy
lr['r2'] = nss_xy*nss_xy/denom if denom > 0 else 0.0
lr['s'] = math.sqrt((sums["yy"] - sums["xy"]*sums["xy"]/sums["xx"])/(n-1))
lr['slope'] = sum_xy / sum_xx
denom = (n*sum_xx-sum_x*sum_x)*(n*sum_yy-sum_y*sum_y)
lr['r2'] = math.pow((n*sum_xy - sum_x*sum_y),2)/denom if denom > 0 else 0.0
lr['s'] = math.sqrt((sum_yy - sum_xy*sum_xy/sum_xx)/n)
return lr
############################################################
### function for curve fitting y = ax+b
### to agree with Gui's results
def linearRegressionAxPB(y, x):
lr = {}
n = len(y) # should be same as len(x)
lr['n'] = n
sum_x = 0.0
sum_y = 0.0
sum_xy = 0.0
sum_xx = 0.0
sum_yy = 0.0
for i in range(n):
sum_x += x[i]
sum_y += y[i]
sum_xy += (x[i]*y[i])
sum_xx += (x[i]*x[i])
sum_yy += (y[i]*y[i])
## http://mathworld.wolfram.com/LeastSquaresFitting.html
nss_xx = n*sum_xx - sum_x*sum_x
nss_yy = n*sum_yy - sum_y*sum_y
lr['slope'] = (n*sum_xy - sum_x*sum_y) / nss_xx
lr['intcp'] = (sum_y*sum_xx - sum_x*sum_xy) / nss_xx
nss_xy = n*sum_xy - sum_x*sum_y
denom = nss_xx*nss_yy
lr['r2'] = nss_xy*nss_xy/denom if denom > 0 else 0.0
lr['s'] = math.sqrt((sum_yy - sum_xy*sum_xy/sum_xx)/(n-1))
return lr
############################################################
### function to get family name from clone name
......@@ -350,7 +336,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
## fit curve
lr = linearRegression(y, x)
lr = regression(y, x, "linear")
coeff[jfam] = {}
coeff[jfam]['f'] = lr['slope']
coeff[jfam]['r2'] = lr['r2']
......@@ -363,7 +349,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
## fit curve Ax+B
lrB = linearRegressionAxPB(y, x)
lrB = regression(y, x, "linearAxPB")
coeff[jfam]['fB'] = lrB['slope']
coeff[jfam]['r2B'] = lrB['r2']
coeff[jfam]['s'] = lrB['s']
......@@ -391,7 +377,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
### fit curve
lr = linearRegression(y, x)
lr = regression(y, x, "linear")
coeff[key]['f'] = lr['slope']
coeff[key]['r2'] = lr['r2']
coeff[key]['s'] = lr['s']
......@@ -422,7 +408,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
### fit curve
lr = linearRegression(y, x)
lr = regression(y, x, "linear")
coeff[key]['f'] = lr['slope']
coeff[key]['r2'] = lr['r2']
coeff[key]['s'] = lr['s']
......@@ -454,7 +440,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
## fit curve
lr = linearRegression(y, x)
lr = regression(y, x, "linear")
coeff['UNI'] = {}
coeff['UNI']['f'] = lr['slope']
coeff['UNI']['r2'] = lr['r2']
......@@ -467,7 +453,7 @@ def computeCoefficients(spikes):
print('** Please check input files **')
else:
## fit curve Ax+B
lrB = linearRegressionAxPB(y, x)
lrB = regression(y, x, "linearAxPB")
coeff['UNI']['fB'] = lrB['slope']
coeff['UNI']['r2B'] = lrB['r2']
coeff['UNI']['sB'] = lrB['s']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment