00001 import math 00002 00003 def Mean(xs): 00004 """ 00005 Calculate mean of dataset 00006 """ 00007 if len(xs)==0: 00008 raise RuntimeError("Can't calculate mean of empty sequence") 00009 return float(sum(xs))/len(xs) 00010 00011 def Median(xs): 00012 """ 00013 Calculate median of dataset 00014 """ 00015 if len(xs)==0: 00016 raise RuntimeError("Can't calculate median of empty sequence") 00017 sorted_xs=sorted(xs) 00018 if (len(xs) % 2)==0: 00019 return (sorted_xs[(len(xs)-1)/2]+sorted_xs[(len(xs)-1)/2+1])/2.0 00020 else: 00021 return sorted_xs[(len(xs)-1)/2] 00022 00023 def StdDev(xs): 00024 """ 00025 Calculate standard-deviation of dataset 00026 00027 | sum[xi-<x>]^2 | 00028 sigma=sqrt|---------------| 00029 | n | 00030 """ 00031 mean=Mean(xs) 00032 return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs)) 00033 00034 def Min(xs): 00035 return min(xs) 00036 00037 def Max(xs): 00038 return max(xs) 00039 00040 def Correl(xs, ys): 00041 """ 00042 Calculates the correlation coefficient between xs and ys as 00043 00044 sum[(xi-<x>)*(yi-<y>)] 00045 r=---------------------- 00046 sx*sy 00047 00048 where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the 00049 standard deviations. 00050 """ 00051 if len(xs)!=len(ys): 00052 raise RuntimeError("Can't calculate correl. Sequence lengths do not match.") 00053 if len(xs)==1: 00054 raise RuntimeError("Can't calculate correl of sequences with length 1.") 00055 mean_x=Mean(xs) 00056 mean_y=Mean(ys) 00057 sigma_x, sigma_y=(0.0, 0.0) 00058 cross_term=0.0 00059 for x, y in zip(xs, ys): 00060 cross_term+=(x-mean_x)*(y-mean_y) 00061 sigma_x+=(x-mean_x)**2 00062 sigma_y+=(y-mean_y)**2 00063 sigma_x=math.sqrt(sigma_x) 00064 sigma_y=math.sqrt(sigma_y) 00065 return cross_term/(sigma_x*sigma_y) 00066 00067 def Histogram(xs, bounds, num_bins): 00068 bins=[0 for i in range(num_bins)] 00069 d=1.0*num_bins/(bounds[1]-bounds[0]) 00070 for x in xs: 00071 index=int((x-bounds[0])*d) 00072 if index>num_bins-1 or index<0: 00073 continue 00074 bins[index]+=1 00075 return bins