00001 import math
00002
00003 def Mean(xs):
00004 """
00005 Calculate mean of dataset
00006 """
00007 if len(xs)==0:
00008 raise RuntimeError("Can't calculate mean of empty sequence")
00009 return float(sum(xs))/len(xs)
00010
00011 def Median(xs):
00012 """
00013 Calculate median of dataset
00014 """
00015 if len(xs)==0:
00016 raise RuntimeError("Can't calculate median of empty sequence")
00017 sorted_xs=sorted(xs)
00018 if (len(xs) % 2)==0:
00019 return (sorted_xs[(len(xs)-1)/2]+sorted_xs[(len(xs)-1)/2+1])/2.0
00020 else:
00021 return sorted_xs[(len(xs)-1)/2]
00022
00023 def StdDev(xs):
00024 """
00025 Calculate standard-deviation of dataset
00026
00027 | sum[xi-<x>]^2 |
00028 sigma=sqrt|---------------|
00029 | n |
00030 """
00031 mean=Mean(xs)
00032 return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs))
00033
00034 def Min(xs):
00035 return min(xs)
00036
00037 def Max(xs):
00038 return max(xs)
00039
00040 def Correl(xs, ys):
00041 """
00042 Calculates the correlation coefficient between xs and ys as
00043
00044 sum[(xi-<x>)*(yi-<y>)]
00045 r=----------------------
00046 sx*sy
00047
00048 where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the
00049 standard deviations.
00050 """
00051 if len(xs)!=len(ys):
00052 raise RuntimeError("Can't calculate correl. Sequence lengths do not match.")
00053 if len(xs)==1:
00054 raise RuntimeError("Can't calculate correl of sequences with length 1.")
00055 mean_x=Mean(xs)
00056 mean_y=Mean(ys)
00057 sigma_x, sigma_y=(0.0, 0.0)
00058 cross_term=0.0
00059 for x, y in zip(xs, ys):
00060 cross_term+=(x-mean_x)*(y-mean_y)
00061 sigma_x+=(x-mean_x)**2
00062 sigma_y+=(y-mean_y)**2
00063 sigma_x=math.sqrt(sigma_x)
00064 sigma_y=math.sqrt(sigma_y)
00065 return cross_term/(sigma_x*sigma_y)
00066
00067 def Histogram(xs, bounds, num_bins):
00068 bins=[0 for i in range(num_bins)]
00069 d=1.0*num_bins/(bounds[1]-bounds[0])
00070 for x in xs:
00071 index=int((x-bounds[0])*d)
00072 if index>num_bins-1 or index<0:
00073 continue
00074 bins[index]+=1
00075 return bins