Commit ec6521b9 by flothoni

### tools/MRDscript; refactor linear regression function

parent af6645ea
 ... ... @@ -41,6 +41,7 @@ import json import os import math import getopt from collections import defaultdict import argparse import os.path ############################################################ ... ... @@ -69,65 +70,50 @@ vidjilToUserFamily = { ### routines ############################################################ ### function for curve fitting y = ax ### just slope, no intercept, to force (0,0) def linearRegression(y, x): ### function for curve fitting def regression(y, x, mode="linear"): """ Function for curve fitting, with a given mode to select. If mode 'linear': curve fitting y = ax; just slope, no intercept, to force (0,0) If mode 'linearAxPB': curve fitting y = ax+b; to agree with Gui's results """ lr = {} n = len(y) # should be same as len(x) lr['n'] = n sum_x = 0.0 sum_y = 0.0 sum_xy = 0.0 sum_xx = 0.0 sum_yy = 0.0 sums = defaultdict(lambda: 0.0) sums["x"] = 0.0 sums["y"] = 0.0 sums["xy"] = 0.0 sums["xx"] = 0.0 sums["yy"] = 0.0 for i in range(n): sum_x += x[i] sum_y += y[i] sum_xy += (x[i]*y[i]) sum_xx += (x[i]*x[i]) sum_yy += (y[i]*y[i]) sums["x"] += x[i] sums["y"] += y[i] sums["xy"] += (x[i]*y[i]) sums["xx"] += (x[i]*x[i]) sums["yy"] += (y[i]*y[i]) if mode == "linear": lr['slope'] = sums["xy"] / sums["xx"] denom = (n*sums["xx"]-sums["x"]*sums["x"])*(n*sums["yy"]-sums["y"]*sums["y"]) lr['r2'] = math.pow((n*sums["xy"] - sums["x"]*sums["y"]),2)/denom if denom > 0 else 0.0 lr['s'] = math.sqrt((sums["yy"] - sums["xy"]*sums["xy"]/sums["xx"])/n) elif mode == "linearAxPB": ## http://mathworld.wolfram.com/LeastSquaresFitting.html nss_xx = n*sums["xx"] - sums["x"]*sums["x"] nss_yy = n*sums["yy"] - sums["y"]*sums["y"] lr['slope'] = (n*sums["xy"] - sums["x"]*sums["y"]) / nss_xx lr['intcp'] = (sums["y"]*sums["xx"] - sums["x"]*sums["xy"]) / nss_xx nss_xy = n*sums["xy"] - sums["x"]*sums["y"] denom = nss_xx*nss_yy lr['r2'] = nss_xy*nss_xy/denom if denom > 0 else 0.0 lr['s'] = math.sqrt((sums["yy"] - sums["xy"]*sums["xy"]/sums["xx"])/(n-1)) lr['slope'] = sum_xy / sum_xx denom = (n*sum_xx-sum_x*sum_x)*(n*sum_yy-sum_y*sum_y) lr['r2'] = math.pow((n*sum_xy - sum_x*sum_y),2)/denom if denom > 0 else 0.0 lr['s'] = math.sqrt((sum_yy - sum_xy*sum_xy/sum_xx)/n) return lr ############################################################ ### function for curve fitting y = ax+b ### to agree with Gui's results def linearRegressionAxPB(y, x): lr = {} n = len(y) # should be same as len(x) lr['n'] = n sum_x = 0.0 sum_y = 0.0 sum_xy = 0.0 sum_xx = 0.0 sum_yy = 0.0 for i in range(n): sum_x += x[i] sum_y += y[i] sum_xy += (x[i]*y[i]) sum_xx += (x[i]*x[i]) sum_yy += (y[i]*y[i]) ## http://mathworld.wolfram.com/LeastSquaresFitting.html nss_xx = n*sum_xx - sum_x*sum_x nss_yy = n*sum_yy - sum_y*sum_y lr['slope'] = (n*sum_xy - sum_x*sum_y) / nss_xx lr['intcp'] = (sum_y*sum_xx - sum_x*sum_xy) / nss_xx nss_xy = n*sum_xy - sum_x*sum_y denom = nss_xx*nss_yy lr['r2'] = nss_xy*nss_xy/denom if denom > 0 else 0.0 lr['s'] = math.sqrt((sum_yy - sum_xy*sum_xy/sum_xx)/(n-1)) return lr ############################################################ ### function to get family name from clone name ... ... @@ -350,7 +336,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ## fit curve lr = linearRegression(y, x) lr = regression(y, x, "linear") coeff[jfam] = {} coeff[jfam]['f'] = lr['slope'] coeff[jfam]['r2'] = lr['r2'] ... ... @@ -363,7 +349,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ## fit curve Ax+B lrB = linearRegressionAxPB(y, x) lrB = regression(y, x, "linearAxPB") coeff[jfam]['fB'] = lrB['slope'] coeff[jfam]['r2B'] = lrB['r2'] coeff[jfam]['s'] = lrB['s'] ... ... @@ -391,7 +377,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ### fit curve lr = linearRegression(y, x) lr = regression(y, x, "linear") coeff[key]['f'] = lr['slope'] coeff[key]['r2'] = lr['r2'] coeff[key]['s'] = lr['s'] ... ... @@ -422,7 +408,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ### fit curve lr = linearRegression(y, x) lr = regression(y, x, "linear") coeff[key]['f'] = lr['slope'] coeff[key]['r2'] = lr['r2'] coeff[key]['s'] = lr['s'] ... ... @@ -454,7 +440,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ## fit curve lr = linearRegression(y, x) lr = regression(y, x, "linear") coeff['UNI'] = {} coeff['UNI']['f'] = lr['slope'] coeff['UNI']['r2'] = lr['r2'] ... ... @@ -467,7 +453,7 @@ def computeCoefficients(spikes): print('** Please check input files **') else: ## fit curve Ax+B lrB = linearRegressionAxPB(y, x) lrB = regression(y, x, "linearAxPB") coeff['UNI']['fB'] = lrB['slope'] coeff['UNI']['r2B'] = lrB['r2'] coeff['UNI']['sB'] = lrB['s'] ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!