OpenStructure
superpose.py
Go to the documentation of this file.
1 """
2 Superposition of structures made simple.
3 
4 Authors: Stefan Bienert
5 """
6 
7 import math
8 import ost.mol.alg
9 import ost.seq.alg
10 
11 def ParseAtomNames(atoms):
12  """
13  Parses different representations of a list of atom names and returns a
14  :class:`set`, understandable by :func:`~ost.mol.alg.MatchResidueByNum`. In
15  essence, this function translates
16 
17  * None to ``None``
18 
19  * 'all' to ``None``
20 
21  * 'backbone' to ``set(['N', 'CA', 'C', 'O'])``
22 
23  * 'aname1, aname2' to ``set(['aname1', 'aname2'])``
24 
25  * ``['aname1', 'aname2']`` to ``set(['aname1', 'aname2'])``
26 
27  :param atoms: Identifier or list of atoms
28  :type atoms: :class:`str`, :class:`list`, :class:`set`
29  :returns: A :class:`set` of atoms.
30  """
31 
32  if atoms==None:
33  return None
34  if isinstance(atoms, str):
35  if atoms.upper()=='ALL':
36  return None
37  if atoms.upper()=='BACKBONE':
38  return set(['N', 'CA', 'C', 'O'])
39 
40  return set([a.strip() for a in atoms.split(',')])
41  return set(atoms)
42 
43 
44 def _EmptyView(view):
45  """
46  for internal use, only
47  """
48  if isinstance(view, ost.mol.EntityHandle):
49  return view.CreateEmptyView()
50  return view.handle.CreateEmptyView()
51 
52 
53 def _fetch_atoms(r_a, r_b, result_a, result_b, atmset):
54  """
55  for internal use, only
56  """
57 
58  for a_a in r_a.GetAtomList():
59  if atmset==None or a_a.name in atmset:
60  a_b = r_b.FindAtom(a_a.name)
61  if a_b.IsValid():
62  result_a.AddAtom(a_a)
63  result_b.AddAtom(a_b)
64  return result_a, result_b
65 
66 
67 def _no_of_chains(ent_a, ent_b):
68  """
69  for internal use, only
70  """
71 
72  if ent_a.chain_count < ent_b.chain_count:
73  return ent_a.chain_count
74  return ent_b.chain_count
75 
76 
77 def MatchResidueByNum(ent_a, ent_b, atoms='all'):
78  """
79  Returns a tuple of views containing exactly the same number of atoms.
80  Residues are matched by residue number. A subset of atoms to be included in
81  the views can be specified in the **atoms** argument. Regardless of what the
82  list of **atoms** says, only those present in two matched residues will be
83  included in the views. Chains are processed in the order they occur in the
84  entities. If **ent_a** and **ent_b** contain a different number of chains,
85  processing stops with the lower count.
86 
87  :param ent_a: The first entity
88  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
89  :param ent_b: The second entity
90  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
91  :param atoms: The subset of atoms to be included in the two views.
92  :type atoms: :class:`str`, :class:`list`, :class:`set`
93  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
94  residues matched by number. Each residue will have the same number
95  & type of atoms.
96  """
97 
98  result_a=_EmptyView(ent_a)
99  result_b=_EmptyView(ent_b)
100  n_chains=_no_of_chains(ent_a, ent_b)
101  atmset=ParseAtomNames(atoms)
102 
103  for i in range(0, n_chains):
104  chain_a=ent_a.chains[i]
105  chain_b=ent_b.chains[i]
106  residues_a=iter(chain_a.residues)
107 
108  if chain_a.InSequence() and chain_b.InSequence():
109  residues_b=iter(chain_b.residues)
110 
111  try:
112  while True:
113  r_a=next(residues_a)
114  r_b=next(residues_b)
115  while r_a.number!=r_b.number:
116  while r_a.number<r_b.number:
117  r_a=next(residues_a)
118  while r_b.number<r_a.number:
119  r_b=next(residues_b)
120  assert r_a.number==r_b.number
121  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
122  except StopIteration:
123  pass
124  else:
125 
126  try:
127  while True:
128  r_a=next(residues_a)
129  r_b=chain_b.FindResidue(r_a.number)
130  if r_b.IsValid():
131  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
132  except StopIteration:
133  pass
134  result_a.AddAllInclusiveBonds()
135  result_b.AddAllInclusiveBonds()
136  return result_a, result_b
137 
138 
139 def MatchResidueByIdx(ent_a, ent_b, atoms='all'):
140  """
141  Returns a tuple of views containing exactly the same number of atoms.
142  Residues are matched by position in the chains of an entity. A subset of
143  atoms to be included in the views can be specified in the **atoms** argument.
144  Regardless of what the list of **atoms** says, only those present in two
145  matched residues will be included in the views. Chains are processed in order
146  of appearance. If **ent_a** and **ent_b** contain a different number of
147  chains, processing stops with the lower count. The number of residues per
148  chain is supposed to be the same.
149 
150  :param ent_a: The first entity
151  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
152  :param ent_b: The second entity
153  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
154  :param atoms: The subset of atoms to be included in the two views.
155  :type atoms: :class:`str`, :class:`list`, :class:`set`
156  :returns: Two :class:`~ost.mol.EntityView` instances with the same amount of
157  residues matched by position. Each residue will have the same number
158  & type of atoms.
159  """
160  not_supported="MatchResidueByIdx has no support for chains of different "\
161  +"lengths"
162 
163  result_a=_EmptyView(ent_a)
164  result_b=_EmptyView(ent_b)
165  n_chains=_no_of_chains(ent_a, ent_b)
166  atmset=ParseAtomNames(atoms)
167 
168  for i in range(0, n_chains):
169  chain_a=ent_a.chains[i]
170  chain_b=ent_b.chains[i]
171 
172  if chain_a.residue_count!=chain_b.residue_count:
173  raise RuntimeError(not_supported)
174 
175  for r_a, r_b in zip(chain_a.residues, chain_b.residues):
176  result_a,result_b=_fetch_atoms(r_a, r_b, result_a, result_b, atmset)
177  result_a.AddAllInclusiveBonds()
178  result_b.AddAllInclusiveBonds()
179  return result_a, result_b
180 
181 
182 def _MatchResidueByAln(ent_a, ent_b, atoms, alnmethod):
183  """
184  For internal use, only
185  """
186 
187  result_a = _EmptyView(ent_a)
188  result_b = _EmptyView(ent_b)
189  n_chains = _no_of_chains(ent_a, ent_b)
190  atmset = ParseAtomNames(atoms)
191 
192  for i in range(0, n_chains):
193 
194  chain_a = ent_a.chains[i]
195  chain_b = ent_b.chains[i]
196  chain_view_a = chain_a.Select('peptide=true')
197  chain_view_b = chain_b.Select('peptide=true')
198  if chain_view_a.chain_count == 0 or chain_view_b.chain_count == 0:
199  # skip empty chains
200  continue
201 
202  s_a = ''.join([r.one_letter_code for r in chain_view_a.residues])
203  s_b = ''.join([r.one_letter_code for r in chain_view_b.residues])
204 
205  seq_a = ost.seq.CreateSequence(chain_a.name, s_a)
206  seq_b = ost.seq.CreateSequence(chain_b.name, s_b)
207  aln_a_b = alnmethod(seq_a, seq_b, ost.seq.alg.BLOSUM62)
208  if not aln_a_b:
209  # skip failed alignments
210  continue
211 
212  max_aln_res = -1
213  for a in range(0, len(aln_a_b)):
214  aln = aln_a_b[a]
215  aln_res_len = 0
216  match_list = list()
217  for i in range(0, aln.GetLength()):
218  if aln.sequences[0][i]!='-' and aln.sequences[1][i]!='-':
219  aln_res_len += 1
220  match_list.append(i)
221  if aln_res_len > max_aln_res:
222  max_aln_res = aln_res_len
223  max_aln_idx = a
224  max_matches = match_list
225 
226  aln = aln_a_b[max_aln_idx]
227 
228  aln.AttachView(0, chain_view_a)
229  aln.AttachView(1, chain_view_b)
230 
231  for i in max_matches:
232  r_a = aln.GetResidue(0,i)
233  r_b = aln.GetResidue(1,i)
234  result_a, result_b = _fetch_atoms(r_a, r_b, result_a, result_b, atmset)
235  result_a.AddAllInclusiveBonds()
236  result_b.AddAllInclusiveBonds()
237  return result_a, result_b
238 
239 def MatchResidueByLocalAln(ent_a, ent_b, atoms='all'):
240  """
241  Match residues by local alignment. Takes **ent_a** and **ent_b**, extracts the
242  sequences chain-wise and aligns them in Smith/Waterman manner using the
243  BLOSUM62 matrix for scoring. Only residues which are marked as :attr:`peptide
244  linking <ost.mol.ResidueHandle.peptide_linking>` are considered for alignment.
245  The residues of the entities are then matched based on this alignment. Only
246  atoms present in both residues are included in the views. Chains are processed
247  in order of appearance. If **ent_a** and **ent_b** contain a different number
248  of chains, processing stops with the lower count.
249 
250  :param ent_a: The first entity
251  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
252  :param ent_b: The second entity
253  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
254  :param atoms: The subset of atoms to be included in the two views.
255  :type atoms: :class:`str`, :class:`list`, :class:`set`
256  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
257  residues. Each residue will have the same number & type of atoms.
258  """
259  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.LocalAlign)
260 
261 def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all'):
262  """
263  Match residues by global alignment.
264  Same as :func:`MatchResidueByLocalAln` but performs a global Needleman/Wunsch
265  alignment of the sequences using the BLOSUM62 matrix for scoring.
266 
267  :param ent_a: The first entity
268  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
269  :param ent_b: The second entity
270  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
271  :param atoms: The subset of atoms to be included in the two views.
272  :type atoms: :class:`str`, :class:`list`, :class:`set`
273  :returns: Two :class:`~ost.mol.EntityView` instances with the same number of
274  residues. Each residue will have the same number & type of atoms.
275  """
276  return _MatchResidueByAln(ent_a, ent_b, atoms, ost.seq.alg.GlobalAlign)
277 
278 
279 def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False,
280  max_iterations=5, distance_threshold=3.0):
281  """
282  Superposes the model entity onto the reference. To do so, two views are
283  created, returned with the result. *atoms* describes what goes into these
284  views and *match* the selection method. For superposition,
285  :func:`SuperposeSVD` or :func:`IterativeSuperposeSVD` are called (depending on
286  *iterative*). For matching, the following methods are recognised:
287 
288  * ``number`` - select residues by residue number, includes *atoms*, calls
289  :func:`MatchResidueByNum`
290 
291  * ``index`` - select residues by index in chain, includes *atoms*, calls
292  :func:`MatchResidueByIdx`
293 
294  * ``local-aln`` - select residues from a Smith/Waterman alignment, includes
295  *atoms*, calls :func:`MatchResidueByLocalAln`
296 
297  * ``global-aln`` - select residues from a Needleman/Wunsch alignment, includes
298  *atoms*, calls :func:`MatchResidueByGlobalAln`
299 
300  :param ent_a: The model entity (superposition transform is applied on full
301  entity handle here)
302  :type ent_a: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
303 
304  :param ent_b: The reference entity
305  :type ent_b: :class:`~ost.mol.EntityView` or :class:`~ost.mol.EntityHandle`
306 
307  :param match: Method to gather residues/ atoms
308  :type match: :class:`str`
309 
310  :param atoms: The subset of atoms to be used in the superposition
311  :type atoms: :class:`str`, :class:`list`, :class:`set`
312 
313  :param iterative: Whether or not to use iterative superpositon.
314  :type iterative: :class:`bool`
315 
316  :param max_iterations: Max. number of iterations for
317  :func:`IterativeSuperposeSVD`
318  (only if *iterative* = True)
319  :type max_iterations: :class:`int`
320 
321  :param distance_threshold: Distance threshold for
322  :func:`IterativeSuperposeSVD`
323  (only if *iterative* = True)
324  :type distance_threshold: :class:`float`
325 
326  :returns: An instance of :class:`SuperpositionResult`.
327  """
328  not_supported = "Superpose called with unsupported matching request."
329 
330  if match.upper() == 'NUMBER':
331  view_a, view_b = MatchResidueByNum(ent_a, ent_b, atoms)
332  elif match.upper() == 'INDEX':
333  view_a, view_b = MatchResidueByIdx(ent_a, ent_b, atoms)
334  elif match.upper() == 'LOCAL-ALN':
335  view_a, view_b = MatchResidueByLocalAln(ent_a, ent_b, atoms)
336  elif match.upper() == 'GLOBAL-ALN':
337  view_a, view_b = MatchResidueByGlobalAln(ent_a, ent_b, atoms)
338  else:
339  raise ValueError(not_supported)
340 
341  if iterative:
342  res = ost.mol.alg.IterativeSuperposeSVD(view_a, view_b, max_iterations,
343  distance_threshold)
344  else:
345  res = ost.mol.alg.SuperposeSVD(view_a, view_b)
346  return res
Protein or molecule.
def MatchResidueByNum(ent_a, ent_b, atoms='all')
Definition: superpose.py:77
def MatchResidueByIdx(ent_a, ent_b, atoms='all')
Definition: superpose.py:139
def MatchResidueByLocalAln(ent_a, ent_b, atoms='all')
Definition: superpose.py:239
def ParseAtomNames(atoms)
Definition: superpose.py:11
def MatchResidueByGlobalAln(ent_a, ent_b, atoms='all')
Definition: superpose.py:261
def Superpose(ent_a, ent_b, match='number', atoms='all', iterative=False, max_iterations=5, distance_threshold=3.0)
Definition: superpose.py:280
SuperpositionResult DLLEXPORT_OST_MOL_ALG SuperposeSVD(const mol::EntityView &ev1, const mol::EntityView &ev2, bool apply_transform)
superposes two entity views
SuperpositionResult DLLEXPORT_OST_MOL_ALG IterativeSuperposeSVD(const mol::EntityView &ev1, const mol::EntityView &ev2, int max_cycles, Real distance_threshold, bool apply_transform)
iterative superposition
SequenceHandle DLLEXPORT_OST_SEQ CreateSequence(const String &name, const String &seq, const String &role="UNKNOWN")