#!/usr/bin/env python
'''
Implement a set of statistics registers in the style of a pocket calculator.
The available routines are::
def Clear(): clear the stats registers
def Show(): print the contents of the stats registers
def Add(x, y): add an X,Y pair
def Subtract(x, y): remove an X,Y pair
def AddWeighted(x, y, z): add an X,Y pair with weight Z
def SubtractWeighted(x, y, z): remove an X,Y pair with weight Z
def Mean(): arithmetic mean of X & Y
def StdDev(): standard deviation on X & Y
def StdErr(): standard error on X & Y
def LinearRegression(): linear regression
def LinearRegressionVariance(): est. errors of slope & intercept
def LinearRegressionCorrelation(): the regression coefficient
def CorrelationCoefficient(): relation of errors in slope & intercept
:see: http://stattrek.com/AP-Statistics-1/Regression.aspx?Tutorial=Stat
pocket calculator Statistical Registers, Pete Jemian, 2003-Apr-18
Source code documentation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
'''
import math
version = '0.1a'
[docs]class StatsRegClass:
'pocket calculator Statistical Registers class'
def __init__(self):
'set up the statistics registers'
self.Clear()
def _ClearResults_(self):
'''
Cache the results to avoid unnecessary recalculation.
When requested, test for None before recalculating.
'''
self.slope = None
self.intercept = None
self.determ = None
self.mean = None
self.sDev = None
self.sErr = None
self.lrVariance = None
self.r = None
self.correlation = None
[docs] def Clear(self):
'''
clear the statistics registers:
:math:`N=w=\sum{x}=\sum{x^2}=\sum{y}=\sum{y^2}=\sum{xy}=0`
'''
self.count = 0
self.weight = 0
self.sumX = 0
self.sumXX = 0
self.sumY = 0
self.sumYY = 0
self.sumXY = 0
self._ClearResults_()
[docs] def Show(self):
'contents of the statistics registers'
print(self.Show.__doc__)
print("\t%s=%d" % ('NumPts', self.count))
print("\t%s=%g\t%s=%g" % ('SumX ', self.sumX, 'SumXX', self.sumXX))
print("\t%s=%g\t%s=%g" % ('SumY ', self.sumY, 'SumYY', self.sumYY))
print("\t%s=%g\t%s=%g" % ('weight', self.weight, 'SumXY', self.sumXY))
[docs] def Add(self, x, y):
'''
add an X,Y pair to the statistics registers
:param float x: value to accumulate
:param float y: value to accumulate
'''
return self.AddWeighted(x,y, 1)
[docs] def Subtract(self, x, y):
'''
remove an X,Y pair from the statistics registers
:param float x: value to remove
:param float y: value to remove
'''
return self.SubtractWeighted(x,y, 1)
[docs] def AddWeighted(self, x, y, z):
'''
add a weighted X,Y+/Z trio to the statistics registers
:param float x: value to accumulate
:param float y: value to accumulate
:param float z: variance (weight = ``1/z^2``) of y
'''
self._ClearResults_()
weight = 1.0/(z**2)
xWt = x*weight
yWt = y*weight
self.count += 1
self.weight += weight
self.sumX += xWt
self.sumXX += xWt**2
self.sumY += yWt
self.sumYY += yWt**2
self.sumXY += xWt*yWt
return self.count
[docs] def SubtractWeighted(self, x, y, z):
'''
remove a weighted X,Y+/-Z trio from the statistics registers
:param float x: value to remove
:param float y: value to remove
:param float z: variance (weight = ``1/z^2``) of y
'''
self._ClearResults_()
weight = 1.0/(z**2)
xWt = x*weight
yWt = y*weight
self.count -= 1
self.weight -= weight
self.sumX -= xWt
self.sumXX -= xWt**2
self.sumY -= yWt
self.sumYY -= yWt**2
self.sumXY -= xWt*yWt
return self.count
[docs] def Mean(self):
'''
arithmetic mean of X & Y
.. math::
(1 / N) \\sum_i^N x_i
:return: mean X and Y values
:rtype: float
'''
if self.mean == None:
self.mean = (self.sumX / self.weight, self.sumY / self.weight)
return self.mean
def __sdeverr(self, summation, sqr, weight):
'''
internal routine standard deviation and
standard error of from given data
'''
temp = sqr - (summation**2)/weight
if temp > 0:
dev = math.sqrt(temp / weight) # standard deviation
err = math.sqrt(temp / (weight-1)) # standard error
else:
dev = 0
err = 0
return (dev, err)
[docs] def StdDev(self):
'''
standard deviation on X & Y
:return: standard deviation of mean X and Y values
:rtype: (float, float)
'''
if self.sDev == None:
xDev = self.__sdeverr(self.sumX, self.sumXX, self.weight)[0]
yDev = self.__sdeverr(self.sumY, self.sumYY, self.weight)[0]
self.sDev = (xDev, yDev)
return self.sDev
[docs] def StdErr(self):
'''
standard error on X & Y
:return: standard error of mean X and Y values
:rtype: (float, float)
'''
if self.sErr == None:
xErr = self.__sdeverr(self.sumX, self.sumXX, self.weight)[1]
yErr = self.__sdeverr(self.sumY, self.sumYY, self.weight)[1]
self.sErr = (xErr, yErr)
return self.sErr
[docs] def LinearEval(self, x):
'''
Evaluate a linear fit at the given value: :math:`y = a + b x`
:param x: independent value, `x`
:type x: float
:return: y
:rtype: float
'''
self.LinearRegression()
return self.constant + x * self.slope
[docs] def determinant(self):
''' compute and return the determinant of the square matrices::
| sum_w sum_x | | sum_w sum_y |
| sum_x sum_(x^2) | | sum_y sum_(y^2) |
:return: determinants of x and y summation matrices
:rtype: (float, float)
'''
if self.determ == None:
x = self.weight*self.sumXX - self.sumX**2
y = self.weight*self.sumYY - self.sumY**2
self.determ = (x, y)
return self.determ
[docs] def LinearRegression(self):
'''
For (*x,y*) data pairs added to the registers,
fit and find (*a,b*) that satisfy:
.. math::
y = a + b x
:return: (a, b) for fit of y=a+b*x
:rtype: (float, float)
'''
if self.slope == None or self.intercept == None:
determ = self.determinant()[0]
self.slope = (self.weight*self.sumXY - self.sumX*self.sumY) / determ
self.intercept = (self.sumXX*self.sumY - self.sumX*self.sumXY) / determ
return (self.intercept, self.slope)
[docs] def LinearRegressionVariance(self):
'''
est. errors of slope & intercept
:return: (var_a, var_b) -- is this correct?
:rtype: (float, float)
'''
if self.lrVariance == None:
determ = self.determinant()[0]
slope = math.sqrt (self.weight / determ)
constant = math.sqrt (self.sumXX / determ)
self.lrVariance = (constant, slope)
return self.lrVariance
[docs] def LinearRegressionCorrelation(self):
'''
the regression coefficient
:return: (corr_a, corr_b) -- is this correct?
:rtype: (float, float)
:see: http://stattrek.com/AP-Statistics-1/Correlation.aspx?Tutorial=Stat
Look at "Product-moment correlation coefficient"
'''
if self.r == None:
VarX, VarY = self.determinant()
self.r = (self.weight*self.sumXX - self.sumX*self.sumY) / math.sqrt(VarX*VarY)
return self.r
[docs] def CorrelationCoefficient(self):
'''
relation of errors in slope and intercept
:return: correlation coefficient
:rtype: float
'''
if self.correlation == None:
self.correlation = -self.sumX / math.sqrt (self.weight * self.sumXX)
return self.correlation
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def __selftest():
'internal test StatsReg functions'
print("%s: %s" % ('documentation', __selftest.__doc__))
print("---------------------------------------")
print("create a set of stats registers")
reg = StatsRegClass()
reg.Show()
print("---------------------------------------")
print("add data to the stats registers")
reg.Add(1, 1)
reg.Add(-2, 2.01)
reg.Add(1, 2)
reg.Show()
(xBar, yBar) = reg.Mean()
(xBarDev, yBarDev) = reg.StdDev()
print("\t%s: %g +/- %g" % ('<x>', xBar, xBarDev))
print("\t%s: %g +/- %g" % ('<y>', yBar, yBarDev))
print("---------------------------------------")
print("subtract data from the stats registers")
reg.Subtract(1, 1)
reg.Show()
(xBar, yBar) = reg.Mean()
(xBarDev, yBarDev) = reg.StdDev()
print("\t%s: %g +/- %g" % ('<x>', xBar, xBarDev))
print("\t%s: %g +/- %g" % ('<y>', yBar, yBarDev))
print("---------------------------------------")
print("clear the stats registers")
reg.Clear()
reg.Show()
print("---------------------------------------")
print("linear regression test:")
reg.Add(518, 1.101)
reg.Add(519, 0.869)
reg.Add(520, 0.674)
reg.Add(521, 0.376)
reg.Add(522, 0.143)
reg.Show()
(xBar, yBar) = reg.Mean()
(xBarDev, yBarDev) = reg.StdDev()
(constant, slope) = reg.LinearRegression()
(constantVar, slopeVar) = reg.LinearRegressionVariance()
print("\t%s: %g +/- %g" % ('constant', constant, constantVar))
print("\t%s: %g +/- %g" % ('slope', slope, slopeVar))
print("\tLinearRegressionCorrelation = %g" % reg.LinearRegressionCorrelation())
print("\tCorrelationCoefficient = %g" % reg.CorrelationCoefficient())
if __name__ == '__main__':
print("%s: v%s, %s" % ('documentation', version, __doc__))
__selftest()