OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 from _ost_seq_alg import *
2 from ost.seq.alg.mat import *
3 
4 def ValidateSEQRESAlignment(aln, chain=None):
5  """
6  Checks if sequence in alignment has same connectivity as residues in chain.
7  This looks for connected stretches in both the sequence and the chain and
8  returns False if they don't match. This uses the connectivity of the protein
9  backbone.
10 
11  :param aln: Alignment of two sequences with second one expected to map to
12  residues in *chain*.
13  :type aln: :class:`~ost.seq.AlignmentHandle`
14  :param chain: Source of the sequence.
15  :type chain: :class:`~ost.mol.ChainHandle`
16 
17  :returns: True if all residues (beside gapped ones) are connected, False
18  otherwise.
19  """
20  from ost import LogWarning
21  from ost import seq
22  from ost import mol
23  if aln.GetCount() != 2:
24  raise ValueError('Alignment contains more than 2 sequences!')
25  sequence = aln.GetSequence(1)
26  if len(sequence) == 0:
27  return True
28  if chain == None:
29  if sequence.HasAttachedView() == False:
30  raise ValueError("Alignment is missing an attached chain view.")
31  chain = sequence.GetAttachedView()
32  residues = chain.residues
33  # eat up all beginning gaps
34  j = 1
35  for s in sequence:
36  if s != '-':
37  break
38  j += 1;
39  l = sequence[j-1]
40  i = 0
41  # run over sequence & alignment
42  for s in sequence[j:]:
43  if s != '-':
44  i += 1
45  r1 = residues[i-1]
46  r2 = residues[i]
47  if r1.one_letter_code=='?' or r2.one_letter_code=='?':
48  continue
49  if l != '-':
50  if not mol.InSequence(r1.handle, r2.handle):
51  LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
52  return False
53  else:
54  if mol.InSequence(r1.handle, r2.handle):
55  LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
56  return False
57  l = s
58  return True
59 
60 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
61  """
62  Aligns the residues of chain to the SEQRES sequence, inserting gaps where
63  needed. The function uses the connectivity of the protein backbone to find
64  consecutive peptide fragments. These fragments are then aligned to the SEQRES
65  sequence.
66 
67  All the non-ligand, peptide-linking residues of the chain must be listed in
68  SEQRES. If there are any additional residues in the chain, the function
69  raises a ValueError.
70 
71  :param chain: Source of the sequence
72  :type chain: :class:`~ost.mol.ChainHandle`
73  :param seqres: SEQRES sequence
74  :type seqres: :class:`str`
75  :param try_resnum_first: If set to True, this first builds an alignment using
76  residue numbers and checks if the one-letter-codes
77  match. If they all match, this alignment is used
78  (and possibly validated). Otherwise, it displays a
79  warning and falls back to the connectivity-based
80  alignment.
81  :type try_resnum_first: :class:`bool`
82  :param validate: If set to True, the alignment is additionally checked by
83  :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
84  a ValueError if the validation failed.
85  :type validate: :class:`bool`
86 
87  :returns: The alignment of the residues in the chain and the SEQRES entries.
88  :rtype: :class:`~ost.seq.AlignmentHandle`
89  """
90 
91  def IsEqual(olc1, olc2):
92  return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
93 
94  from ost import seq
95  from ost import mol
96  from ost import LogWarning
97  view=chain
98  residues=view.residues
99  if len(residues)==0:
100  return seq.CreateAlignment()
101  if try_resnum_first:
102  aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
103  for r1 in residues:
104  if r1.number.num <= len(seqres) and r1.number.num > 0:
105  if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
106  aln_seq[r1.number.num - 1] = r1.one_letter_code
107  else:
108  LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
109  '", while SEQRES is "' + seqres[r1.number.num - 1] +
110  '" at the corresponding position.')
111  try_resnum_first = False
112  break
113  if not try_resnum_first:
114  fragments=[residues[0].one_letter_code]
115  for r1, r2 in zip(residues[:-1], residues[1:]):
116  if not mol.InSequence(r1.handle, r2.handle):
117  fragments.append('')
118  fragments[-1]+=r2.one_letter_code
119  ss=str(seqres)
120  pos=0
121  aln_seq=''
122  for frag in fragments:
123  new_pos=ss.find(frag, pos)
124  if new_pos==-1:
125  raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
126  aln_seq+='-'*(new_pos-pos)+frag
127  pos=new_pos+len(frag)
128  aln_seq = seq.CreateSequence('atoms',
129  aln_seq+('-'*(len(seqres)-len(aln_seq))))
130  alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
131  aln_seq)
132  if validate and not ValidateSEQRESAlignment(alignment, view):
133  raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
134  return alignment
135 
136 
137 def AlignmentFromChainView(chain, handle_seq_name='handle',
138  view_seq_name='view'):
139  """
140  Creates and returns the sequence alignment of the given chain view to the
141  chain handle. The alignment contains two sequences, the first containing all
142  non-ligand peptide-linking residues, the second containing all non-ligand
143  peptide-linking residues that are part of the view.
144 
145  :param chain: A valid chain
146  :type chain: :class:`~ost.mol.ChainView`
147 
148  :param handle_seq_name: Name of the handle sequence in the output alignment
149  :param view_seq_name: Name of the view sequence in the output alignment
150  :returns: The alignment
151  :rtype: :class:`~ost.seq.AlignmentHandle`
152 
153  """
154  from ost import seq
155  v0=chain.handle.Select('ligand=false and peptide=true')
156  v1=chain.Select('ligand=false and peptide=true')
157  s0=seq.CreateSequence(handle_seq_name, '')
158  s1=seq.CreateSequence(view_seq_name, '')
159  s0.AttachView(v0)
160  s1.AttachView(v1)
161  res0=v0.residues
162  res1=v1.residues
163  idx0, idx1=(0, 0)
164  while idx0<len(res0):
165  s0.Append(res0[idx0].one_letter_code)
166  if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
167  s1.Append(res1[idx1].one_letter_code)
168  idx1+=1
169  else:
170  s1.Append('-')
171  idx0+=1
172  return seq.CreateAlignment(s0, s1)
173 
175  """
176  Predicts contacts from a multiple sequence alignment using a combination
177  of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
178  MI is calculated with the APC and small number corrections as well as with a
179  transformation into Z-scores. The *CoEvoSc* is calculated using the default
180  PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
181  The final score for a pair of columns *(i,j)* of **ali** is obtained from:
182 
183  Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
184 
185  Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
186 
187  :param ali: The multiple sequence alignment
188  :type ali: :class:`~ost.seq.AlignmentHandle`
189  """
190  import math
191  from ost import seq
192  if not type(ali)==type(seq.AlignmentHandle()):
193  print "Parameter should be an AlignmentHandle"
194  return
197  ncol=ali.GetLength()
198  for i in range(ncol):
199  for j in range(ncol):
200  if mi.matrix[i][j]>=0:
201  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
202  else:
203  mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
204  mi.RefreshSortedIndices()
205  return mi
206 
207 def CalculateContactProbability(cpred_res,method):
208  """
209  Calculate the probability of a predicted contact to be correct.
210  This simply transforms the score associated with a prediction into a probability.
211 
212  :param cpred_res: A contact prediction result
213  :param method: The method which was used for contact prediction. Should be one
214  of "MI", "MIp", "MIpz", "cevoMI", "cevo"
215 
216  :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
217  :type method: :class:`str`
218  """
219  import math
220  def _growth_function(x,K,x0,B,nu,Q):
221  p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
222  p=min(1,max(0,p))
223  return p
224 
225  def _decay_function(x,A,k):
226  p=A*math.exp(-k*x)
227  p=min(1,max(0,p))
228  return p
229 
230  prob_params={}
231  prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
232  prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
233  prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
234  prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
235  prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
236 
237  if not method in prob_params:
238  raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
239  params=prob_params[method]
240  cpred_res.RefreshSortedIndices()
241  nres=len(cpred_res.matrix)
242  probabilities=[[0 for i in range(nres)] for j in range(nres)]
243 
244  if len(params)==2:func=_decay_function
245  else:func=_growth_function
246 
247  nres=float(nres)
248  if len(params)==2:
249  for k,(i,j) in enumerate(cpred_res.sorted_indices):
250  p=_decay_function(math.log10(0.01+k/nres),*params)
251  probabilities[i][j]=p
252  probabilities[j][i]=p
253  else:
254  for k,(i,j) in enumerate(cpred_res.sorted_indices):
255  p=_growth_function(cpred_res.GetScore(i,j),*params)
256  probabilities[i][j]=p
257  probabilities[j][i]=p
258  cpred_res.probabilities=probabilities
259  return cpred_res
260 
261 
262 
263 
def ValidateSEQRESAlignment
Definition: __init__.py:4
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
def CalculateContactProbability
Definition: __init__.py:207
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
def PredictContacts
Definition: __init__.py:174
def AlignToSEQRES
Definition: __init__.py:60
representation of a multiple sequence alignemnt consisting of two or more sequences ...
def AlignmentFromChainView
Definition: __init__.py:138