OpenStructure
stutil.py
Go to the documentation of this file.
1 import math
2 from ost import mol
3
4 def FloatValueExtract(func):
5  """
6  Decorator to wrap functions that take a list of float values. In addition to
7  passing in a list of float values directly, it is possible to extract the
8  values from attributes or generic properties.
9  """
10  def _dec(xs, prop=None, attr=None):
11  if prop!=None:
12  if len(xs)==0:
13  return func([])
14  assert attr==None
15  level=mol.Prop.Level.UNSPECIFIED
16  if isinstance(xs[0], mol.AtomBase):
17  level=mol.Prop.Level.ATOM
18  elif isinstance(xs[0], mol.ResidueBase):
19  level=mol.Prop.Level.RESIDUE
20  elif isinstance(xs[0], mol.ChainBase):
21  level=mol.Prop.Level.CHAIN
22  epm=mol.EntityPropertyMapper(prop, level)
23  vals=[]
24  for x in xs:
25  try:
26  vals.append(epm.Get(x))
27  except:
28  pass
29  return func(vals)
30  if attr!=None:
31  vals=[]
32  for x in xs:
33  try:
34  vals.append(getattr(x, attr))
35  except:
36  pass
37  return func(vals)
38  return func(xs)
39  return _dec
40
41 @FloatValueExtract
42 def Mean(xs):
43  """
44  Calculate mean of dataset
45  """
46  if len(xs)==0:
47  raise RuntimeError("Can't calculate mean of empty sequence")
48  return float(sum(xs))/len(xs)
49
50 @FloatValueExtract
51 def Median(xs):
52  """
53  Calculate median of dataset
54  """
55  if len(xs)==0:
56  raise RuntimeError("Can't calculate median of empty sequence")
57  sorted_xs=sorted(xs)
58  if (len(xs) % 2)==0:
59  return (sorted_xs[(len(xs)-1)/2]+sorted_xs[(len(xs)-1)/2+1])/2.0
60  else:
61  return sorted_xs[(len(xs)-1)/2]
62
63 @FloatValueExtract
64 def StdDev(xs):
65  """
66  Calculate standard-deviation of dataset
67
68  | sum[xi-<x>]^2 |
69  sigma=sqrt|---------------|
70  | n |
71  """
72  mean=Mean(xs)
73  return math.sqrt(sum([(x-mean)**2 for x in xs])/len(xs))
74
75 @FloatValueExtract
76 def Min(xs):
77  return min(xs)
78
79 @FloatValueExtract
80 def Max(xs):
81  return max(xs)
82
83 def Correl(xs, ys):
84  """
85  Calculates the correlation coefficient between xs and ys as
86
87  sum[(xi-<x>)*(yi-<y>)]
88  r=----------------------
89  sx*sy
90
91  where <x>, <y> are the mean of dataset xs and ys, and, sx and sy are the
92  standard deviations.
93  """
94  if len(xs)!=len(ys):
95  raise RuntimeError("Can't calculate correl. Sequence lengths do not match.")
96  if len(xs)==1:
97  raise RuntimeError("Can't calculate correl of sequences with length 1.")
98  mean_x=Mean(xs)
99  mean_y=Mean(ys)
100  sigma_x, sigma_y=(0.0, 0.0)
101  cross_term=0.0
102  for x, y in zip(xs, ys):
103  cross_term+=(x-mean_x)*(y-mean_y)
104  sigma_x+=(x-mean_x)**2
105  sigma_y+=(y-mean_y)**2
106  sigma_x=math.sqrt(sigma_x)
107  sigma_y=math.sqrt(sigma_y)
108  return cross_term/(sigma_x*sigma_y)
109
110 def Histogram(xs, bounds, num_bins):
111  bins=[0 for i in range(num_bins)]
112  d=1.0*num_bins/(bounds[1]-bounds[0])
113  for x in xs:
114  index=int((x-bounds[0])*d)
115  if index>num_bins-1 or index<0:
116  continue
117  bins[index]+=1
118  return bins