OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
qsscoring.py
Go to the documentation of this file.
1 """
2 Scoring of quaternary structures (QS). The QS scoring is according to the paper
3 by `Bertoni et al. <https://dx.doi.org/10.1038/s41598-017-09654-8>`_.
4 
5 .. note ::
6 
7  Requirements for use:
8 
9  - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
10  and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
11  when executing scripts with ``ost``. Otherwise, you must set this with
12  :func:`~ost.conop.SetDefaultLib`.
13  - ClustalW must be installed (unless you provide chain mappings)
14  - Python modules `numpy` and `scipy` must be installed and available
15  (e.g. use ``pip install scipy numpy``)
16 """
17 # Original authors: Gerardo Tauriello, Martino Bertoni
18 
19 from ost import mol, geom, conop, seq, settings, PushVerbosityLevel
20 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
21 from ost.bindings.clustalw import ClustalW
22 from ost.mol.alg import lDDTScorer
23 from ost.seq.alg.renumber import Renumber
24 import numpy as np
25 from scipy.misc import factorial
26 from scipy.special import binom
27 from scipy.cluster.hierarchy import fclusterdata
28 import itertools
29 
30 ###############################################################################
31 # QS scoring
32 ###############################################################################
33 
34 class QSscoreError(Exception):
35  """Exception to be raised for "acceptable" exceptions in QS scoring.
36 
37  Those are cases we might want to capture for default behavior.
38  """
39  pass
40 
41 class QSscorer:
42  """Object to compute QS scores.
43 
44  Simple usage without any precomputed contacts, symmetries and mappings:
45 
46  .. code-block:: python
47 
48  import ost
49  from ost.mol.alg import qsscoring
50 
51  # load two biounits to compare
52  ent_full = ost.io.LoadPDB('3ia3', remote=True)
53  ent_1 = ent_full.Select('cname=A,D')
54  ent_2 = ent_full.Select('cname=B,C')
55  # get score
56  ost.PushVerbosityLevel(3)
57  try:
58  qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
59  ost.LogScript('QSscore:', str(qs_scorer.global_score))
60  ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
61  # commonly you want the QS global score as output
62  qs_score = qs_scorer.global_score
63  except qsscoring.QSscoreError as ex:
64  # default handling: report failure and set score to 0
65  ost.LogError('QSscore failed:', str(ex))
66  qs_score = 0
67 
68  For maximal performance when computing QS scores of the same entity with many
69  others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
70 
71  Any known / precomputed information can be filled into the appropriate
72  attribute here (no checks done!). Otherwise most quantities are computed on
73  first access and cached (lazy evaluation). Setters are provided to set values
74  with extra checks (e.g. :func:`SetSymmetries`).
75 
76  All necessary seq. alignments are done by global BLOSUM62-based alignment. A
77  multiple sequence alignment is performed with ClustalW unless
78  :attr:`chain_mapping` is provided manually. You will need to have an
79  executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
80  :attr:`clustalw_bin` accordingly. Otherwise an exception
81  (:class:`ost.settings.FileNotFound`) is thrown.
82 
83  Formulas for QS scores:
84 
85  ::
86 
87  - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
88  - QS_global = weighted_scores / (weight_sum + weight_extra_all)
89  -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
90  -> weight_sum = sum(w(min(d1,d2))) for shared
91  -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
92  -> weight_extra_all = sum(w(d)) for all non-shared
93  -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
94 
95  In the formulas above:
96 
97  * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
98  "shared" contacts).
99  * "mapped": we could map chains of two structures and align residues in
100  :attr:`alignments`.
101  * "shared": pairs of residues which are "mapped" and have
102  "inter-chain contact" in both structures.
103  * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
104  (fallback to CA-CA if :attr:`calpha_only` is True).
105  * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
106  from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
107 
108  :param ent_1: First structure to be scored.
109  :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
110  :class:`~ost.mol.EntityView`
111  :param ent_2: Second structure to be scored.
112  :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
113  :class:`~ost.mol.EntityView`
114  :param res_num_alignment: Sets :attr:`res_num_alignment`
115 
116  :raises: :class:`QSscoreError` if input structures are invalid or are monomers
117  or have issues that make it impossible for a QS score to be computed.
118 
119  .. attribute:: qs_ent_1
120 
121  :class:`QSscoreEntity` object for *ent_1* given at construction.
122  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
123  set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
124 
125  .. attribute:: qs_ent_2
126 
127  :class:`QSscoreEntity` object for *ent_2* given at construction.
128  If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
129  set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
130 
131  .. attribute:: calpha_only
132 
133  True if any of the two structures is CA-only (after cleanup).
134 
135  :type: :class:`bool`
136 
137  .. attribute:: max_ca_per_chain_for_cm
138 
139  Maximal number of CA atoms to use in each chain to determine chain mappings.
140  Setting this to -1 disables the limit. Limiting it speeds up determination
141  of symmetries and chain mappings. By default it is set to 100.
142 
143  :type: :class:`int`
144 
145  .. attribute:: max_mappings_extensive
146 
147  Maximal number of chain mappings to test for 'extensive'
148  :attr:`chain_mapping_scheme`. The extensive chain mapping search must in the
149  worst case check O(N^2) * O(N!) possible mappings for complexes with N
150  chains. Two octamers without symmetry would require 322560 mappings to be
151  checked. To limit computations, a :class:`QSscoreError` is thrown if we try
152  more than the maximal number of chain mappings.
153  The value must be set before the first use of :attr:`chain_mapping`.
154  By default it is set to 100000.
155 
156  :type: :class:`int`
157 
158  .. attribute:: res_num_alignment
159 
160  Forces each alignment in :attr:`alignments` to be based on residue numbers
161  instead of using a global BLOSUM62-based alignment.
162 
163  :type: :class:`bool`
164  """
165  def __init__(self, ent_1, ent_2, res_num_alignment=False):
166  # generate QSscoreEntity objects?
167  if isinstance(ent_1, QSscoreEntity):
168  self.qs_ent_1 = ent_1
169  else:
170  self.qs_ent_1 = QSscoreEntity(ent_1)
171  if isinstance(ent_2, QSscoreEntity):
172  self.qs_ent_2 = ent_2
173  else:
174  self.qs_ent_2 = QSscoreEntity(ent_2)
175  # check validity of inputs
176  if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
177  raise QSscoreError("Invalid input in QSscorer!")
178  # set names for structures
179  if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
180  self.qs_ent_1.SetName('pdb_1')
181  self.qs_ent_2.SetName('pdb_2')
182  else:
183  self.qs_ent_1.SetName(self.qs_ent_1.original_name)
184  self.qs_ent_2.SetName(self.qs_ent_2.original_name)
185  # set other public attributes
186  self.res_num_alignment = res_num_alignment
187  self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
190  # init cached stuff
191  self._chem_mapping = None
192  self._ent_to_cm_1 = None
193  self._ent_to_cm_2 = None
194  self._symm_1 = None
195  self._symm_2 = None
196  self._chain_mapping = None
197  self._chain_mapping_scheme = None
198  self._alignments = None
199  self._mapped_residues = None
200  self._global_score = None
201  self._best_score = None
202  self._superposition = None
203  self._clustalw_bin = None
204 
205  @property
206  def chem_mapping(self):
207  """Inter-complex mapping of chemical groups.
208 
209  Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
210  highest sequence identity. Alignment is between longest sequences in groups.
211 
212  Limitations:
213 
214  - If different numbers of groups, we map only the groups for the complex
215  with less groups (rest considered unmapped and shown as warning)
216  - The mapping is forced: the "best" mapping will be chosen independently of
217  how low the seq. identity may be
218 
219  :getter: Computed on first use (cached)
220  :type: :class:`dict` with key = :class:`tuple` of chain names in
221  :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
222  :attr:`qs_ent_2`.
223 
224  :raises: :class:`QSscoreError` if we end up having no chains for either
225  entity in the mapping (can happen if chains do not have CA atoms).
226  """
227  if self._chem_mapping is None:
228  self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2)
229  return self._chem_mapping
230 
231  @property
232  def ent_to_cm_1(self):
233  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
234 
235  Properties:
236 
237  - Includes only residues aligned according to :attr:`chem_mapping`
238  - Includes only 1 CA atom per residue
239  - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
240  - All chains of the same chemical group have the same number of atoms
241  (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
242  - All chains appearing in :attr:`chem_mapping` appear in this entity
243  (so the two can be safely used together)
244 
245  This entity might be transformed (i.e. all positions rotated/translated by
246  same transformation matrix) if this can speed up computations. So do not
247  assume fixed global positions (but relative distances will remain fixed).
248 
249  :getter: Computed on first use (cached)
250  :type: :class:`~ost.mol.EntityHandle`
251 
252  :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
253  """
254  if self._ent_to_cm_1 is None:
256  return self._ent_to_cm_1
257 
258  @property
259  def ent_to_cm_2(self):
260  """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
261  (see :attr:`ent_to_cm_1` for details).
262  """
263  if self._ent_to_cm_2 is None:
265  return self._ent_to_cm_2
266 
267  @property
268  def symm_1(self):
269  """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
270 
271  This is a list of chain-lists where each chain-list can be used reconstruct
272  the others via cyclic C or dihedral D symmetry. The first chain-list is used
273  as a representative symmetry group. For heteromers, the group-members must
274  contain all different seqres in oligomer.
275 
276  Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
277  symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
278 
279  Properties:
280 
281  - All symmetry group tuples have the same length (num. of chains)
282  - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
283  - For heteros: symmetry group tuples have all different chem. groups
284  - Trivial symmetry group = one tuple with all chains (used if inconsistent
285  data provided or if no symmetry is found)
286  - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
287  Compatibility requires same lengths of symmetry group tuples and it must
288  be possible to get an overlap (80% of residues covered within 6 A of a
289  (chem. mapped) chain) of all chains in representative symmetry groups by
290  superposing one pair of chains.
291 
292  :getter: Computed on first use (cached)
293  :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
294  """
295  if self._symm_1 is None:
296  self._ComputeSymmetry()
297  return self._symm_1
298 
299  @property
300  def symm_2(self):
301  """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
302  if self._symm_2 is None:
303  self._ComputeSymmetry()
304  return self._symm_2
305 
306  def SetSymmetries(self, symm_1, symm_2):
307  """Set user-provided symmetry groups.
308 
309  These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
310  and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
311  chains and both *symm_1* and *symm_2* have same lengths of symmetry group
312  tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
313 
314  :param symm_1: Value to set for :attr:`symm_1`.
315  :param symm_2: Value to set for :attr:`symm_2`.
316  """
317  # restrict chain names
318  self._symm_1 = _CleanUserSymmetry(symm_1, self.ent_to_cm_1)
319  self._symm_2 = _CleanUserSymmetry(symm_2, self.ent_to_cm_2)
320  # check that we have reasonable symmetries set (fallback: all chains)
321  if not _AreValidSymmetries(self._symm_1, self._symm_2):
322  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
323  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
324 
325  @property
326  def chain_mapping(self):
327  """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
328 
329  Properties:
330 
331  - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
332  - Each chain can appear only once in mapping
333  - All chains of complex with less chains are mapped
334  - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
335 
336  Details on algorithms used to find mapping:
337 
338  - We try all pairs of chem. mapped chains within symmetry group and get
339  superpose-transformation for them
340  - First option: check for "sufficient overlap" of other chain-pairs
341 
342  - For each chain-pair defined above: apply superposition to full oligomer
343  and map chains based on structural overlap
344  - Structural overlap = X% of residues in second oligomer covered within Y
345  Angstrom of a (chem. mapped) chain in first oligomer. We successively
346  try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
347  mapping (warning shown for most permissive one).
348  - If multiple possible mappings are found, we choose the one which leads
349  to the lowest multi-chain-RMSD given the superposition
350 
351  - Fallback option: try all mappings to find minimal multi-chain-RMSD
352  (warning shown)
353 
354  - For each chain-pair defined above: apply superposition, try all (!)
355  possible chain mappings (within symmetry group) and keep mapping with
356  lowest multi-chain-RMSD
357  - Repeat procedure above to resolve symmetry. Within the symmetry group we
358  can use the chain mapping computed before and we just need to find which
359  symmetry group in first oligomer maps to which in the second one. We
360  again try all possible combinations...
361  - Limitations:
362 
363  - Trying all possible mappings is a combinatorial nightmare (factorial).
364  We throw an exception if too many combinations (e.g. octomer vs
365  octomer with no usable symmetry)
366  - The mapping is forced: the "best" mapping will be chosen independently
367  of how badly they fit in terms of multi-chain-RMSD
368  - As a result, such a forced mapping can lead to a large range of
369  resulting QS scores. An extreme example was observed between 1on3.1
370  and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
371  for mappings with very similar multi-chain-RMSD.
372 
373  :getter: Computed on first use (cached)
374  :type: :class:`dict` with key / value = :class:`str` (chain names, key
375  for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
376  :raises: :class:`QSscoreError` if there are too many combinations to check
377  to find a chain mapping (see :attr:`max_mappings_extensive`).
378  """
379  if self._chain_mapping is None:
381  _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2, self.symm_1,
382  self.symm_2, self.chem_mapping,
384  LogInfo('Mapping found: %s' % str(self._chain_mapping))
385  return self._chain_mapping
386 
387  @property
389  """Mapping scheme used to get :attr:`chain_mapping`.
390 
391  Possible values:
392 
393  - 'strict': 80% overlap needed within 4 Angstrom (overlap based mapping).
394  - 'tolerant': 40% overlap needed within 6 Angstrom (overlap based mapping).
395  - 'permissive': 20% overlap needed within 8 Angstrom (overlap based
396  mapping). It's best if you check mapping manually!
397  - 'extensive': Extensive search used for mapping detection (fallback). This
398  approach has known limitations and may be removed in future versions.
399  Mapping should be checked manually!
400  - 'user': :attr:`chain_mapping` was set by user before first use of this
401  attribute.
402 
403  :getter: Computed with :attr:`chain_mapping` on first use (cached)
404  :type: :class:`str`
405  :raises: :class:`QSscoreError` as in :attr:`chain_mapping`.
406  """
407  if self._chain_mapping_scheme is None:
408  # default: user provided
409  self._chain_mapping_scheme = 'user'
410  # get chain mapping and make sure internal variable is set
411  # -> will not compute and only update _chain_mapping if user provided
412  # -> will compute and overwrite _chain_mapping_scheme else
413  self._chain_mapping = self.chain_mapping
414  return self._chain_mapping_scheme
415 
416  @property
417  def alignments(self):
418  """List of successful sequence alignments using :attr:`chain_mapping`.
419 
420  There will be one alignment for each mapped chain and they are ordered by
421  their chain names in :attr:`qs_ent_1`.
422 
423  The first sequence of each alignment belongs to :attr:`qs_ent_1` and the
424  second one to :attr:`qs_ent_2`. The sequences are named according to the
425  mapped chain names and have views attached into :attr:`QSscoreEntity.ent`
426  of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
427 
428  If :attr:`res_num_alignment` is False, each alignment is performed using a
429  global BLOSUM62-based alignment. Otherwise, the positions in the alignment
430  sequences are simply given by the residue number so that residues with
431  matching numbers are aligned.
432 
433  :getter: Computed on first use (cached)
434  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
435  """
436  if self._alignments is None:
437  self._alignments = _GetMappedAlignments(self.qs_ent_1.ent,
438  self.qs_ent_2.ent,
439  self.chain_mapping,
440  self.res_num_alignment)
441  return self._alignments
442 
443  @property
444  def mapped_residues(self):
445  """Mapping of shared residues in :attr:`alignments`.
446 
447  :getter: Computed on first use (cached)
448  :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
449  *c1* = Chain name in first entity (= first sequence in aln),
450  *r1* = Residue number in first entity,
451  *r2* = Residue number in second entity
452  """
453  if self._mapped_residues is None:
454  self._mapped_residues = _GetMappedResidues(self.alignments)
455  return self._mapped_residues
456 
457  @property
458  def global_score(self):
459  """QS-score with penalties.
460 
461  The range of the score is between 0 (i.e. no interface residues are shared
462  between biounits) and 1 (i.e. the interfaces are identical).
463 
464  The global QS-score is computed applying penalties when interface residues
465  or entire chains are missing (i.e. anything that is not mapped in
466  :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
467 
468  :getter: Computed on first use (cached)
469  :type: :class:`float`
470  :raises: :class:`QSscoreError` if only one chain is mapped
471  """
472  if self._global_score is None:
473  self._ComputeScores()
474  return self._global_score
475 
476  @property
477  def best_score(self):
478  """QS-score without penalties.
479 
480  Like :attr:`global_score`, but neglecting additional residues or chains in
481  one of the biounits (i.e. the score is calculated considering only mapped
482  chains and residues).
483 
484  :getter: Computed on first use (cached)
485  :type: :class:`float`
486  :raises: :class:`QSscoreError` if only one chain is mapped
487  """
488  if self._best_score is None:
489  self._ComputeScores()
490  return self._best_score
491 
492  @property
493  def superposition(self):
494  """Superposition result based on shared CA atoms in :attr:`alignments`.
495 
496  The superposition can be used to map :attr:`QSscoreEntity.ent` of
497  :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
498  :func:`ost.geom.Invert` if you need the opposite transformation.
499 
500  :getter: Computed on first use (cached)
501  :type: :class:`ost.mol.alg.SuperpositionResult`
502  """
503  if self._superposition is None:
504  self._superposition = _GetQsSuperposition(self.alignments)
505  # report it
506  sup_rmsd = self._superposition.rmsd
507  cmp_view = self._superposition.view1
508  LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
509  % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
510  return self._superposition
511 
512  @property
513  def clustalw_bin(self):
514  """
515  Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
516  sequence alignments (unless :attr:`chain_mapping` is provided manually).
517 
518  :getter: Located in path on first use (cached)
519  :type: :class:`str`
520  """
521  if self._clustalw_bin is None:
522  self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
523  return self._clustalw_bin
524 
525  def GetOligoLDDTScorer(self, settings, penalize_extra_chains=True):
526  """
527  :return: :class:`OligoLDDTScorer` object, setup for this QS scoring problem.
528  The scorer is set up with :attr:`qs_ent_1` as the reference and
529  :attr:`qs_ent_2` as the model.
530  :param settings: Passed to :class:`OligoLDDTScorer` constructor.
531  :param penalize_extra_chains: Passed to :class:`OligoLDDTScorer` constructor.
532  """
533  if penalize_extra_chains:
534  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
535  self.alignments, self.calpha_only, settings,
536  True, self.chem_mapping)
537  else:
538  return OligoLDDTScorer(self.qs_ent_1.ent, self.qs_ent_2.ent,
539  self.alignments, self.calpha_only, settings, False)
540 
541 
542  ##############################################################################
543  # Class internal helpers (anything that doesnt easily work without this class)
544  ##############################################################################
545 
546  def _ComputeAlignedEntities(self):
547  """Fills cached ent_to_cm_1 and ent_to_cm_2."""
548  # get aligned residues via MSA
549  ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
550  self.chem_mapping,
552  self.clustalw_bin)
553  # extract new entities
554  self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
555  self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
556  # name them
557  self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
558  self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
559 
560  def _ComputeSymmetry(self):
561  """Fills cached symm_1 and symm_2."""
562  # get them
563  self._symm_1, self._symm_2 = \
564  _FindSymmetry(self.qs_ent_1, self.qs_ent_2, self.ent_to_cm_1,
565  self.ent_to_cm_2, self.chem_mapping)
566  # check that we have reasonable symmetries set (fallback: all chains)
567  if not _AreValidSymmetries(self._symm_1, self._symm_2):
568  self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
569  self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
570 
571  def _ComputeScores(self):
572  """Fills cached global_score and best_score."""
573  if len(self.chain_mapping) < 2:
574  raise QSscoreError("QS-score is not defined for monomers")
575  # get contacts
576  if self.calpha_only:
577  contacts_1 = self.qs_ent_1.contacts_ca
578  contacts_2 = self.qs_ent_2.contacts_ca
579  else:
580  contacts_1 = self.qs_ent_1.contacts
581  contacts_2 = self.qs_ent_2.contacts
582  # get scores
583  scores = _GetScores(contacts_1, contacts_2, self.mapped_residues,
584  self.chain_mapping)
585  self._best_score = scores[0]
586  self._global_score = scores[1]
587  # report scores
588  LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
589  % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
590  self._best_score, self._global_score))
591 
592 
593 ###############################################################################
594 # Entity with cached entries for QS scoring
595 ###############################################################################
596 
597 class QSscoreEntity(object):
598  """Entity with cached entries for QS scoring.
599 
600  Any known / precomputed information can be filled into the appropriate
601  attribute here as long as they are labelled as read/write. Otherwise the
602  quantities are computed on first access and cached (lazy evaluation). The
603  heaviest load is expected when computing :attr:`contacts` and
604  :attr:`contacts_ca`.
605 
606  :param ent: Entity to be used for QS scoring. A copy of it will be processed.
607  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
608 
609  .. attribute:: is_valid
610 
611  True, if successfully initialized. False, if input structure has no protein
612  chains with >= 20 residues.
613 
614  :type: :class:`bool`
615 
616  .. attribute:: original_name
617 
618  Name set for *ent* when object was created.
619 
620  :type: :class:`str`
621 
622  .. attribute:: ent
623 
624  Cleaned version of *ent* passed at construction. Hydrogens are removed, the
625  entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
626  listed in :attr:`removed_chains` have been removed. The name of this entity
627  might change during scoring (see :func:`GetName`). Otherwise, this will be
628  fixed.
629 
630  :type: :class:`~ost.mol.EntityHandle`
631 
632  .. attribute:: removed_chains
633 
634  Chains removed from *ent* passed at construction. These are ligand and water
635  chains as well as small (< 20 res.) peptides or chains with no amino acids
636  (determined by chem. type, which is set by rule based processor).
637 
638  :type: :class:`list` of :class:`str`
639 
640  .. attribute:: calpha_only
641 
642  Whether entity is CA-only (i.e. it has 0 CB atoms)
643 
644  :type: :class:`bool`
645  """
646  def __init__(self, ent):
647  # copy entity and process/clean it
648  self.original_name = ent.GetName()
649  ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
650  if not conop.GetDefaultLib():
651  raise RuntimeError("QSscore computation requires a compound library!")
652  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
653  pr.Process(ent)
654  self.ent, self.removed_chains, self.calpha_only = _CleanInputEntity(ent)
655  # check if it's suitable for QS scoring
656  if self.ent.chain_count == 0:
657  LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
658  'removing water, ligands and small peptides.')
659  self.is_valid = False
660  elif self.ent.chain_count == 1:
661  LogWarning('Structure ' + ent.GetName() + ' is a monomer.')
662  self.is_valid = True
663  else:
664  self.is_valid = True
665  # init cached stuff
666  self._ca_entity = None
667  self._ca_chains = None
668  self._alignments = {}
669  self._chem_groups = None
670  self._angles = {}
671  self._axis = {}
672  self._contacts = None
673  self._contacts_ca = None
674 
675  def GetName(self):
676  """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
677  This is used to uniquely identify the entity while scoring. The name may
678  therefore change while :attr:`original_name` remains fixed.
679  """
680  # for duck-typing and convenience
681  return self.ent.GetName()
682 
683  def SetName(self, new_name):
684  """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
685  Use this to change unique identifier while scoring (see :func:`GetName`).
686  """
687  # for duck-typing and convenience
688  self.ent.SetName(new_name)
689 
690  @property
691  def ca_entity(self):
692  """
693  Reduced representation of :attr:`ent` with only CA atoms.
694  This guarantees that each included residue has exactly one atom.
695 
696  :getter: Computed on first use (cached)
697  :type: :class:`~ost.mol.EntityHandle`
698  """
699  if self._ca_entity is None:
700  self._ca_entity = _GetCAOnlyEntity(self.ent)
701  return self._ca_entity
702 
703  @property
704  def ca_chains(self):
705  """
706  Map of chain names in :attr:`ent` to sequences with attached view to CA-only
707  chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
708 
709  :getter: Computed on first use (cached)
710  :type: :class:`dict` (key = :class:`str`,
711  value = :class:`~ost.seq.SequenceHandle`)
712  """
713  if self._ca_chains is None:
714  self._ca_chains = dict()
715  ca_entity = self.ca_entity
716  for ch in ca_entity.chains:
717  self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
718  return self._ca_chains
719 
720  def GetAlignment(self, c1, c2):
721  """Get sequence alignment of chain *c1* with chain *c2*.
722  Computed on first use based on :attr:`ca_chains` (cached).
723 
724  :param c1: Chain name for first chain to align.
725  :type c1: :class:`str`
726  :param c2: Chain name for second chain to align.
727  :type c2: :class:`str`
728  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
729  """
730  if (c1,c2) not in self._alignments:
731  ca_chains = self.ca_chains
732  self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
733  return self._alignments[(c1,c2)]
734 
735  @property
736  def chem_groups(self):
737  """
738  Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
739  chains as extracted from :attr:`ca_chains`. First chain in group is the one
740  with the longest sequence.
741 
742  :getter: Computed on first use (cached)
743  :type: :class:`list` of :class:`list` of :class:`str` (chain names)
744  """
745  if self._chem_groups is None:
746  self._chem_groups = _GetChemGroups(self, 95)
747  LogInfo('Chemically equivalent chain-groups in %s: %s' \
748  % (self.GetName(), str(self._chem_groups)))
749  return self._chem_groups
750 
751  def GetAngles(self, c1, c2):
752  """Get Euler angles from superposition of chain *c1* with chain *c2*.
753  Computed on first use based on :attr:`ca_chains` (cached).
754 
755  :param c1: Chain name for first chain to superpose.
756  :type c1: :class:`str`
757  :param c2: Chain name for second chain to superpose.
758  :type c2: :class:`str`
759  :return: 3 Euler angles (may contain nan if something fails).
760  :rtype: :class:`numpy.array`
761  """
762  if (c1,c2) not in self._angles:
763  self._GetSuperposeData(c1, c2)
764  return self._angles[(c1,c2)]
765 
766  def GetAxis(self, c1, c2):
767  """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
768  Computed on first use based on :attr:`ca_chains` (cached).
769 
770  :param c1: Chain name for first chain to superpose.
771  :type c1: :class:`str`
772  :param c2: Chain name for second chain to superpose.
773  :type c2: :class:`str`
774  :return: Rotational axis (may contain nan if something fails).
775  :rtype: :class:`numpy.array`
776  """
777  if (c1,c2) not in self._axis:
778  self._GetSuperposeData(c1, c2)
779  return self._axis[(c1,c2)]
780 
781  @property
782  def contacts(self):
783  """
784  Connectivity dictionary (**read/write**).
785  As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
786 
787  :getter: Computed on first use (cached)
788  :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
789  for chains in the cleaned entity.
790  :type: See return type of :func:`GetContacts`
791  """
792  if self._contacts is None:
793  self._contacts = GetContacts(self.ent, False)
794  return self._contacts
795 
796  @contacts.setter
797  def contacts(self, new_contacts):
798  chain_names = set([ch.name for ch in self.ent.chains])
799  self._contacts = FilterContacts(new_contacts, chain_names)
800 
801  @property
802  def contacts_ca(self):
803  """
804  CA-only connectivity dictionary (**read/write**).
805  Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
806  """
807  if self._contacts_ca is None:
808  self._contacts_ca = GetContacts(self.ent, True)
809  return self._contacts_ca
810 
811  @contacts_ca.setter
812  def contacts_ca(self, new_contacts):
813  chain_names = set([ch.name for ch in self.ent.chains])
814  self._contacts_ca = FilterContacts(new_contacts, chain_names)
815 
816  ##############################################################################
817  # Class internal helpers (anything that doesnt easily work without this class)
818  ##############################################################################
819 
820  def _GetSuperposeData(self, c1, c2):
821  """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
822  # get aligned views (must contain identical numbers of atoms!)
823  aln = self.GetAlignment(c1, c2)
824  if not aln:
825  # fallback for non-aligned stuff (nan)
826  self._angles[(c1,c2)] = np.empty(3) * np.nan
827  self._axis[(c1,c2)] = np.empty(3) * np.nan
828  return
829  v1, v2 = seq.ViewsFromAlignment(aln)
830  if v1.atom_count < 3:
831  # fallback for non-aligned stuff (nan)
832  self._angles[(c1,c2)] = np.empty(3) * np.nan
833  self._axis[(c1,c2)] = np.empty(3) * np.nan
834  return
835  # get transformation
836  sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
837  Rt = sup_res.transformation
838  # extract angles
839  a,b,c = _GetAngles(Rt)
840  self._angles[(c1,c2)] = np.asarray([a,b,c])
841  # extract axis of symmetry
842  R3 = geom.Rotation3(Rt.ExtractRotation())
843  self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
844 
845 ###############################################################################
846 # Contacts computations
847 ###############################################################################
848 
849 def FilterContacts(contacts, chain_names):
850  """Filter contacts to contain only contacts for chains in *chain_names*.
851 
852  :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
853  :type contacts: :class:`dict`
854  :param chain_names: Chain names to keep.
855  :type chain_names: :class:`list` or (better) :class:`set`
856  :return: New connectivity dictionary (format as in :func:`GetContacts`)
857  :rtype: :class:`dict`
858  """
859  # create new dict with subset
860  filtered_contacts = dict()
861  for c1 in contacts:
862  if c1 in chain_names:
863  new_contacts = dict()
864  for c2 in contacts[c1]:
865  if c2 in chain_names:
866  new_contacts[c2] = contacts[c1][c2]
867  # avoid adding empty dicts
868  if new_contacts:
869  filtered_contacts[c1] = new_contacts
870  return filtered_contacts
871 
872 def GetContacts(entity, calpha_only, dist_thr=12.0):
873  """Get inter-chain contacts of a macromolecular entity.
874 
875  Contacts are pairs of residues within a given distance belonging to different
876  chains. They are stored once per pair and include the CA/CB-CA/CB distance.
877 
878  :param entity: An entity to check connectivity for.
879  :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
880  :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
881  unless the residue is a GLY.
882  :type calpha_only: :class:`bool`
883  :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
884  :type dist_thr: :class:`float`
885  :return: A connectivity dictionary. A pair of residues with chain names
886  *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
887  *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
888  stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
889  :rtype: :class:`dict`
890  """
891  # get ent copy to search on
892  if calpha_only:
893  ev = entity.Select("aname=CA")
894  else:
895  ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
896  ent = mol.CreateEntityFromView(ev, True)
897  # search all vs all
898  contacts = dict()
899  for atom in ent.atoms:
900  ch_name1 = atom.chain.name
901  res_num1 = atom.residue.number.num
902  close_atoms = ent.FindWithin(atom.pos, dist_thr)
903  for close_atom in close_atoms:
904  ch_name2 = close_atom.chain.name
905  if ch_name2 > ch_name1:
906  res_num2 = close_atom.residue.number.num
907  dist = geom.Distance(atom.pos, close_atom.pos)
908  # add to contacts
909  if ch_name1 not in contacts:
910  contacts[ch_name1] = dict()
911  if ch_name2 not in contacts[ch_name1]:
912  contacts[ch_name1][ch_name2] = dict()
913  if res_num1 not in contacts[ch_name1][ch_name2]:
914  contacts[ch_name1][ch_name2][res_num1] = dict()
915  contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
916  # DONE
917  return contacts
918 
919 ###############################################################################
920 # Oligo-lDDT scores
921 ###############################################################################
922 
923 class OligoLDDTScorer(object):
924  """Helper class to calculate oligomeric lDDT scores.
925 
926  This class can be used independently, but commonly it will be created by
927  calling :func:`QSscorer.GetOligoLDDTScorer`.
928 
929  .. note::
930 
931  By construction, lDDT scores are not symmetric and hence it matters which
932  structure is the reference (:attr:`ref`) and which one is the model
933  (:attr:`mdl`). Extra residues in the model are generally not considered.
934  Extra chains in both model and reference can be considered by setting the
935  :attr:`penalize_extra_chains` flag to True.
936 
937  :param ref: Sets :attr:`ref`
938  :param mdl: Sets :attr:`mdl`
939  :param alignments: Sets :attr:`alignments`
940  :param calpha_only: Sets :attr:`calpha_only`
941  :param settings: Sets :attr:`settings`
942  :param penalize_extra_chains: Sets :attr:`penalize_extra_chains`
943  :param chem_mapping: Sets :attr:`chem_mapping`. Must be given if
944  *penalize_extra_chains* is True.
945 
946  .. attribute:: ref
947  mdl
948 
949  Full reference/model entity to be scored. The entity must contain all chains
950  mapped in :attr:`alignments` and may also contain additional ones which are
951  considered if :attr:`penalize_extra_chains` is True.
952 
953  :type: :class:`~ost.mol.EntityHandle`
954 
955  .. attribute:: alignments
956 
957  One alignment for each mapped chain of :attr:`ref`/:attr:`mdl` as defined in
958  :attr:`QSscorer.alignments`. The first sequence of each alignment belongs to
959  :attr:`ref` and the second one to :attr:`mdl`. Sequences must have sequence
960  naming and attached views as defined in :attr:`QSscorer.alignments`.
961 
962  :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
963 
964  .. attribute:: calpha_only
965 
966  If True, restricts lDDT score to CA only.
967 
968  :type: :class:`bool`
969 
970  .. attribute:: settings
971 
972  Settings to use for lDDT scoring.
973 
974  :type: :class:`~ost.mol.alg.lDDTSettings`
975 
976  .. attribute:: penalize_extra_chains
977 
978  If True, extra chains in both :attr:`ref` and :attr:`mdl` will penalize the
979  lDDT scores.
980 
981  :type: :class:`bool`
982 
983  .. attribute:: chem_mapping
984 
985  Inter-complex mapping of chemical groups as defined in
986  :attr:`QSscorer.chem_mapping`. Used to find "chem-mapped" chains in
987  :attr:`ref` for unmapped chains in :attr:`mdl` when penalizing scores.
988  Each unmapped model chain can add extra reference-contacts according to the
989  average total contacts of each single "chem-mapped" reference chain. If
990  there is no "chem-mapped" reference chain, a warning is shown and the model
991  chain is ignored.
992 
993 
994  Only relevant if :attr:`penalize_extra_chains` is True.
995 
996  :type: :class:`dict` with key = :class:`tuple` of chain names in
997  :attr:`ref` and value = :class:`tuple` of chain names in
998  :attr:`mdl`.
999  """
1000 
1001  # NOTE: one could also allow computation of both penalized and unpenalized
1002  # in same object -> must regenerate lddt_ref / lddt_mdl though
1003 
1004  def __init__(self, ref, mdl, alignments, calpha_only, settings,
1005  penalize_extra_chains=False, chem_mapping=None):
1006  # sanity checks
1007  if chem_mapping is None and penalize_extra_chains:
1008  raise RuntimeError("Must provide chem_mapping when requesting penalty "
1009  "for extra chains!")
1010  if not penalize_extra_chains:
1011  # warn for unmapped model chains
1012  unmapped_mdl_chains = self._GetUnmappedMdlChains(mdl, alignments)
1013  if unmapped_mdl_chains:
1014  LogWarning('MODEL contains chains unmapped to REFERENCE, '
1015  'lDDT is not considering MODEL chains %s' \
1016  % str(list(unmapped_mdl_chains)))
1017  # warn for unmapped reference chains
1018  ref_chains = set(ch.name for ch in ref.chains)
1019  mapped_ref_chains = set(aln.GetSequence(0).GetName() for aln in alignments)
1020  unmapped_ref_chains = (ref_chains - mapped_ref_chains)
1021  if unmapped_ref_chains:
1022  LogWarning('REFERENCE contains chains unmapped to MODEL, '
1023  'lDDT is not considering REFERENCE chains %s' \
1024  % str(list(unmapped_ref_chains)))
1025  # prepare fields
1026  self.ref = ref
1027  self.mdl = mdl
1028  self.alignments = alignments
1029  self.calpha_only = calpha_only
1030  self.settings = settings
1031  self.penalize_extra_chains = penalize_extra_chains
1032  self.chem_mapping = chem_mapping
1033  self._sc_lddt = None
1034  self._oligo_lddt = None
1035  self._weighted_lddt = None
1036  self._lddt_ref = None
1037  self._lddt_mdl = None
1038  self._oligo_lddt_scorer = None
1039  self._mapped_lddt_scorers = None
1040  self._ref_scorers = None
1041  self._model_penalty = None
1042 
1043  @property
1044  def oligo_lddt(self):
1045  """Oligomeric lDDT score.
1046 
1047  The score is computed as conserved contacts divided by the total contacts
1048  in the reference using the :attr:`oligo_lddt_scorer`, which uses the full
1049  complex as reference/model structure. If :attr:`penalize_extra_chains` is
1050  True, the reference/model complexes contain all chains (otherwise only the
1051  mapped ones) and additional contacts are added to the reference's total
1052  contacts for unmapped model chains according to the :attr:`chem_mapping`.
1053 
1054  The main difference with :attr:`weighted_lddt` is that the lDDT scorer
1055  "sees" the full complex here (incl. inter-chain contacts), while the
1056  weighted single chain score looks at each chain separately.
1057 
1058  :getter: Computed on first use (cached)
1059  :type: :class:`float`
1060  """
1061  if self._oligo_lddt is None:
1062  LogInfo('Reference %s has: %s chains' \
1063  % (self.ref.GetName(), self.ref.chain_count))
1064  LogInfo('Model %s has: %s chains' \
1065  % (self.mdl.GetName(), self.mdl.chain_count))
1066 
1067  # score with or w/o extra-chain penalty
1068  if self.penalize_extra_chains:
1069  denominator = self.oligo_lddt_scorer.total_contacts
1070  denominator += self._GetModelPenalty()
1071  if denominator > 0:
1072  oligo_lddt = self.oligo_lddt_scorer.conserved_contacts \
1073  / float(denominator)
1074  else:
1075  oligo_lddt = 0.0
1076  else:
1077  oligo_lddt = self.oligo_lddt_scorer.global_score
1078  self._oligo_lddt = oligo_lddt
1079  return self._oligo_lddt
1080 
1081  @property
1082  def weighted_lddt(self):
1083  """Weighted average of single chain lDDT scores.
1084 
1085  The score is computed as a weighted average of single chain lDDT scores
1086  (see :attr:`sc_lddt_scorers`) using the total contacts of each single
1087  reference chain as weights. If :attr:`penalize_extra_chains` is True,
1088  unmapped chains are added with a 0 score and total contacts taken from
1089  the actual reference chains or (for unmapped model chains) using the
1090  :attr:`chem_mapping`.
1091 
1092  See :attr:`oligo_lddt` for a comparison of the two scores.
1093 
1094  :getter: Computed on first use (cached)
1095  :type: :class:`float`
1096  """
1097  if self._weighted_lddt is None:
1098  scores = [s.global_score for s in self.sc_lddt_scorers]
1099  weights = [s.total_contacts for s in self.sc_lddt_scorers]
1100  nominator = sum([s * w for s, w in zip(scores, weights)])
1101  if self.penalize_extra_chains:
1102  ref_scorers = self._GetRefScorers()
1103  denominator = sum(s.total_contacts for s in ref_scorers.values())
1104  denominator += self._GetModelPenalty()
1105  else:
1106  denominator = sum(weights)
1107  if denominator > 0:
1108  self._weighted_lddt = nominator / float(denominator)
1109  else:
1110  self._weighted_lddt = 0.0
1111  return self._weighted_lddt
1112 
1113  @property
1114  def lddt_ref(self):
1115  """The reference entity used for oligomeric lDDT scoring
1116  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1117 
1118  Since the lDDT computation requires a single chain with mapped residue
1119  numbering, all chains of :attr:`ref` are appended into a single chain X with
1120  unique residue numbers according to the column-index in the alignment. The
1121  alignments are in the same order as they appear in :attr:`alignments`.
1122  Additional residues are appended at the end of the chain with unique residue
1123  numbers. Unmapped chains are only added if :attr:`penalize_extra_chains` is
1124  True. Only CA atoms are considered if :attr:`calpha_only` is True.
1125 
1126  :getter: Computed on first use (cached)
1127  :type: :class:`~ost.mol.EntityHandle`
1128  """
1129  if self._lddt_ref is None:
1130  self._PrepareOligoEntities()
1131  return self._lddt_ref
1132 
1133  @property
1134  def lddt_mdl(self):
1135  """The model entity used for oligomeric lDDT scoring
1136  (:attr:`oligo_lddt` / :attr:`oligo_lddt_scorer`).
1137 
1138  Like :attr:`lddt_ref`, this is a single chain X containing all chains of
1139  :attr:`mdl`. The residue numbers match the ones in :attr:`lddt_ref` where
1140  aligned and have unique numbers for additional residues.
1141 
1142  :getter: Computed on first use (cached)
1143  :type: :class:`~ost.mol.EntityHandle`
1144  """
1145  if self._lddt_mdl is None:
1146  self._PrepareOligoEntities()
1147  return self._lddt_mdl
1148 
1149  @property
1151  """lDDT Scorer object for :attr:`lddt_ref` and :attr:`lddt_mdl`.
1152 
1153  :getter: Computed on first use (cached)
1154  :type: :class:`~ost.mol.alg.lDDTScorer`
1155  """
1156  if self._oligo_lddt_scorer is None:
1158  references=[self.lddt_ref.Select("")],
1159  model=self.lddt_mdl.Select(""),
1160  settings=self.settings)
1161  return self._oligo_lddt_scorer
1162 
1163  @property
1165  """List of scorer objects for each chain mapped in :attr:`alignments`.
1166 
1167  :getter: Computed on first use (cached)
1168  :type: :class:`list` of :class:`MappedLDDTScorer`
1169  """
1170  if self._mapped_lddt_scorers is None:
1171  self._mapped_lddt_scorers = list()
1172  for aln in self.alignments:
1173  mapped_lddt_scorer = MappedLDDTScorer(aln, self.calpha_only,
1174  self.settings)
1175  self._mapped_lddt_scorers.append(mapped_lddt_scorer)
1176  return self._mapped_lddt_scorers
1177 
1178  @property
1179  def sc_lddt_scorers(self):
1180  """List of lDDT scorer objects extracted from :attr:`mapped_lddt_scorers`.
1181 
1182  :type: :class:`list` of :class:`~ost.mol.alg.lDDTScorer`
1183  """
1184  return [mls.lddt_scorer for mls in self.mapped_lddt_scorers]
1185 
1186  @property
1187  def sc_lddt(self):
1188  """List of global scores extracted from :attr:`sc_lddt_scorers`.
1189 
1190  If scoring for a mapped chain fails, an error is displayed and a score of 0
1191  is assigned.
1192 
1193  :getter: Computed on first use (cached)
1194  :type: :class:`list` of :class:`float`
1195  """
1196  if self._sc_lddt is None:
1197  self._sc_lddt = list()
1198  for lddt_scorer in self.sc_lddt_scorers:
1199  try:
1200  self._sc_lddt.append(lddt_scorer.global_score)
1201  except Exception as ex:
1202  LogError('Single chain lDDT failed:', str(ex))
1203  self._sc_lddt.append(0.0)
1204  return self._sc_lddt
1205 
1206  ##############################################################################
1207  # Class internal helpers
1208  ##############################################################################
1209 
1210  def _PrepareOligoEntities(self):
1211  # simple wrapper to avoid code duplication
1212  self._lddt_ref, self._lddt_mdl = _MergeAlignedChains(
1213  self.alignments, self.ref, self.mdl, self.calpha_only,
1214  self.penalize_extra_chains)
1215 
1216  @staticmethod
1217  def _GetUnmappedMdlChains(mdl, alignments):
1218  # assume model is second sequence in alignment and is named by chain
1219  mdl_chains = set(ch.name for ch in mdl.chains)
1220  mapped_mdl_chains = set(aln.GetSequence(1).GetName() for aln in alignments)
1221  return (mdl_chains - mapped_mdl_chains)
1222 
1223  def _GetRefScorers(self):
1224  # single chain lddt scorers for each reference chain (key = chain name)
1225  if self._ref_scorers is None:
1226  # collect from mapped_lddt_scorers
1227  ref_scorers = dict()
1228  for mapped_lddt_scorer in self.mapped_lddt_scorers:
1229  ref_ch_name = mapped_lddt_scorer.reference_chain_name
1230  ref_scorers[ref_ch_name] = mapped_lddt_scorer.lddt_scorer
1231  # add new ones where needed
1232  for ch in self.ref.chains:
1233  if ch.name not in ref_scorers:
1234  if self.calpha_only:
1235  ref_chain = ch.Select('aname=CA')
1236  else:
1237  ref_chain = ch.Select('')
1238  ref_scorers[ch.name] = lDDTScorer(
1239  references=[ref_chain],
1240  model=ref_chain,
1241  settings=self.settings)
1242  # store in cache
1243  self._ref_scorers = ref_scorers
1244  # fetch from cache
1245  return self._ref_scorers
1246 
1247  def _GetModelPenalty(self):
1248  # extra value to add to total number of distances for extra model chains
1249  # -> estimated from chem-mapped reference chains
1250  if self._model_penalty is None:
1251  # sanity check
1252  if self.chem_mapping is None:
1253  raise RuntimeError("Must provide chem_mapping when requesting penalty "
1254  "for extra model chains!")
1255  # get cached ref_scorers
1256  ref_scorers = self._GetRefScorers()
1257  # get unmapped model chains
1258  unmapped_mdl_chains = self._GetUnmappedMdlChains(self.mdl, self.alignments)
1259  # map extra chains to ref. chains
1260  model_penalty = 0
1261  for ch_name_mdl in sorted(unmapped_mdl_chains):
1262  # get penalty for chain
1263  cur_penalty = None
1264  for cm_ref, cm_mdl in self.chem_mapping.iteritems():
1265  if ch_name_mdl in cm_mdl:
1266  # penalize by an average of the chem. mapped ref. chains
1267  cur_penalty = 0
1268  for ch_name_ref in cm_ref:
1269  # assumes that total_contacts is cached (for speed)
1270  cur_penalty += ref_scorers[ch_name_ref].total_contacts
1271  cur_penalty /= float(len(cm_ref))
1272  break
1273  # report penalty
1274  if cur_penalty is None:
1275  LogWarning('Extra MODEL chain %s could not be chemically mapped to '
1276  'any chain in REFERENCE, lDDT cannot consider it!' \
1277  % ch_name_mdl)
1278  else:
1279  LogScript('Extra MODEL chain %s added to lDDT score by considering '
1280  'chemically mapped chains in REFERENCE.' % ch_name_mdl)
1281  model_penalty += cur_penalty
1282  # store in cache
1283  self._model_penalty = model_penalty
1284  # fetch from cache
1285  return self._model_penalty
1286 
1287 
1288 class MappedLDDTScorer(object):
1289  """A simple class to calculate a single-chain lDDT score on a given chain to
1290  chain mapping as extracted from :class:`OligoLDDTScorer`.
1291 
1292  :param alignment: Sets :attr:`alignment`
1293  :param calpha_only: Sets :attr:`calpha_only`
1294  :param settings: Sets :attr:`settings`
1295 
1296  .. attribute:: alignment
1297 
1298  Alignment with two sequences named according to the mapped chains and with
1299  views attached to both sequences (e.g. one of the items of
1300  :attr:`QSscorer.alignments`).
1301 
1302  The first sequence is assumed to be the reference and the second one the
1303  model. Since the lDDT score is not symmetric (extra residues in model are
1304  ignored), the order is important.
1305 
1306  :type: :class:`~ost.seq.AlignmentHandle`
1307 
1308  .. attribute:: calpha_only
1309 
1310  If True, restricts lDDT score to CA only.
1311 
1312  :type: :class:`bool`
1313 
1314  .. attribute:: settings
1315 
1316  Settings to use for lDDT scoring.
1317 
1318  :type: :class:`~ost.mol.alg.lDDTSettings`
1319 
1320  .. attribute:: lddt_scorer
1321 
1322  lDDT Scorer object for the given chains.
1323 
1324  :type: :class:`~ost.mol.alg.lDDTScorer`
1325 
1326  .. attribute:: reference_chain_name
1327 
1328  Chain name of the reference.
1329 
1330  :type: :class:`str`
1331 
1332  .. attribute:: model_chain_name
1333 
1334  Chain name of the model.
1335 
1336  :type: :class:`str`
1337  """
1338  def __init__(self, alignment, calpha_only, settings):
1339  # prepare fields
1340  self.alignment = alignment
1341  self.calpha_only = calpha_only
1342  self.settings = settings
1343  self.lddt_scorer = None # set in _InitScorer
1344  self.reference_chain_name = alignment.sequences[0].name
1345  self.model_chain_name = alignment.sequences[1].name
1346  self._old_number_label = "old_num"
1347  self._extended_alignment = None # set in _InitScorer
1348  # initialize lDDT scorer
1349  self._InitScorer()
1350 
1352  """
1353  :return: Scores for each residue
1354  :rtype: :class:`list` of :class:`dict` with one item for each residue
1355  existing in model and reference:
1356 
1357  - "residue_number": Residue number in reference chain
1358  - "residue_name": Residue name in reference chain
1359  - "lddt": local lDDT
1360  - "conserved_contacts": number of conserved contacts
1361  - "total_contacts": total number of contacts
1362  """
1363  scores = list()
1364  assigned_residues = list()
1365  # Make sure the score is calculated
1366  self.lddt_scorer.global_score
1367  for col in self._extended_alignment:
1368  if col[0] != "-" and col.GetResidue(3).IsValid():
1369  ref_res = col.GetResidue(0)
1370  mdl_res = col.GetResidue(1)
1371  ref_res_renum = col.GetResidue(2)
1372  mdl_res_renum = col.GetResidue(3)
1373  if ref_res.one_letter_code != ref_res_renum.one_letter_code:
1374  raise RuntimeError("Reference residue name mapping inconsistent: %s != %s" %
1375  (ref_res.one_letter_code,
1376  ref_res_renum.one_letter_code))
1377  if mdl_res.one_letter_code != mdl_res_renum.one_letter_code:
1378  raise RuntimeError("Model residue name mapping inconsistent: %s != %s" %
1379  (mdl_res.one_letter_code,
1380  mdl_res_renum.one_letter_code))
1381  if ref_res.GetNumber().num != ref_res_renum.GetIntProp(self._old_number_label):
1382  raise RuntimeError("Reference residue number mapping inconsistent: %s != %s" %
1383  (ref_res.GetNumber().num,
1384  ref_res_renum.GetIntProp(self._old_number_label)))
1385  if mdl_res.GetNumber().num != mdl_res_renum.GetIntProp(self._old_number_label):
1386  raise RuntimeError("Model residue number mapping inconsistent: %s != %s" %
1387  (mdl_res.GetNumber().num,
1388  mdl_res_renum.GetIntProp(self._old_number_label)))
1389  if ref_res.qualified_name in assigned_residues:
1390  raise RuntimeError("Duplicated residue in reference: " %
1391  (ref_res.qualified_name))
1392  else:
1393  assigned_residues.append(ref_res.qualified_name)
1394  # check if property there (may be missing for CA-only)
1395  if mdl_res_renum.HasProp(self.settings.label):
1396  scores.append({
1397  "residue_number": ref_res.GetNumber().num,
1398  "residue_name": ref_res.name,
1399  "lddt": mdl_res_renum.GetFloatProp(self.settings.label),
1400  "conserved_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_conserved"),
1401  "total_contacts": mdl_res_renum.GetFloatProp(self.settings.label + "_total")})
1402  return scores
1403 
1404  ##############################################################################
1405  # Class internal helpers (anything that doesnt easily work without this class)
1406  ##############################################################################
1407 
1408  def _InitScorer(self):
1409  # Use copy of alignment (extended by 2 extra sequences for renumbering)
1410  aln = self.alignment.Copy()
1411  # Get chains and renumber according to alignment (for lDDT)
1412  reference = Renumber(
1413  aln.GetSequence(0),
1414  old_number_label=self._old_number_label).CreateFullView()
1415  refseq = seq.CreateSequence(
1416  "reference_renumbered",
1417  aln.GetSequence(0).GetString())
1418  refseq.AttachView(reference)
1419  aln.AddSequence(refseq)
1420  model = Renumber(
1421  aln.GetSequence(1),
1422  old_number_label=self._old_number_label).CreateFullView()
1423  modelseq = seq.CreateSequence(
1424  "model_renumbered",
1425  aln.GetSequence(1).GetString())
1426  modelseq.AttachView(model)
1427  aln.AddSequence(modelseq)
1428  # Filter to CA-only if desired (done after AttachView to not mess it up)
1429  if self.calpha_only:
1430  self.lddt_scorer = lDDTScorer(
1431  references=[reference.Select('aname=CA')],
1432  model=model.Select('aname=CA'),
1433  settings=self.settings)
1434  else:
1435  self.lddt_scorer = lDDTScorer(
1436  references=[reference],
1437  model=model,
1438  settings=self.settings)
1439  # Store alignment for later
1440  self._extended_alignment = aln
1441 
1442 ###############################################################################
1443 # HELPERS
1444 ###############################################################################
1445 
1446 # general
1447 
1448 def _AlignAtomSeqs(seq_1, seq_2):
1449  """
1450  :type seq_1: :class:`ost.seq.SequenceHandle`
1451  :type seq_2: :class:`ost.seq.SequenceHandle`
1452  :return: Alignment of two sequences using a global alignment. Views attached
1453  to the input sequences will remain attached in the aln.
1454  :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
1455  """
1456  # NOTE: If the two sequence have a greatly different length
1457  # a local alignment could be a better choice...
1458  aln = None
1459  alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
1460  if alns: aln = alns[0]
1461  if not aln:
1462  LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
1463  LogWarning('%s: %s' % (seq_1.name, seq_1.string))
1464  LogWarning('%s: %s' % (seq_2.name, seq_2.string))
1465  return aln
1466 
1467 def _FixSelectChainNames(ch_names):
1468  """
1469  :return: String to be used with Select(cname=<RETURN>). Takes care of joining
1470  and putting quotation marks where needed.
1471  :rtype: :class:`str`
1472  :param ch_names: Some iterable list of chain names (:class:`str` items).
1473  """
1474  return ','.join(mol.QueryQuoteName(ch_name) for ch_name in ch_names)
1475 
1476 # QS entity
1477 
1478 def _CleanInputEntity(ent):
1479  """
1480  :param ent: The OST entity to be cleaned.
1481  :type ent: :class:`EntityHandle` or :class:`EntityView`
1482  :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
1483  :attr:`QSscoreEntity.removed_chains`,
1484  :attr:`QSscoreEntity.calpha_only`
1485  """
1486  # find chains to remove
1487  removed_chains = []
1488  for ch in ent.chains:
1489  # we remove chains if they are small-peptides or if the contain no aa
1490  # or if they contain only unknown or modified residues
1491  if ch.name in ['-', '_'] \
1492  or ch.residue_count < 20 \
1493  or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
1494  or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
1495  removed_chains.append(ch.name)
1496 
1497  # remove them from *ent*
1498  if removed_chains:
1499  view = ent.Select('cname!=%s' % _FixSelectChainNames(set(removed_chains)))
1500  ent_new = mol.CreateEntityFromView(view, True)
1501  ent_new.SetName(ent.GetName())
1502  else:
1503  ent_new = ent
1504 
1505  # check if CA only
1506  calpha_only = False
1507  if ent_new.atom_count > 0 and ent_new.Select('aname=CB').atom_count == 0:
1508  LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
1509  calpha_only = True
1510 
1511  # report and return
1512  if removed_chains:
1513  LogInfo('Chains removed from %s: %s' \
1514  % (ent_new.GetName(), ''.join(removed_chains)))
1515  LogInfo('Chains in %s: %s' \
1516  % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
1517  return ent_new, removed_chains, calpha_only
1518 
1519 def _GetCAOnlyEntity(ent):
1520  """
1521  :param ent: Entity to process
1522  :type ent: :class:`EntityHandle` or :class:`EntityView`
1523  :return: New entity with only CA and only one atom per residue
1524  (see :attr:`QSscoreEntity.ca_entity`)
1525  """
1526  # cook up CA only view (diff from Select = guaranteed 1 atom per residue)
1527  ca_view = ent.CreateEmptyView()
1528  # add chain by chain
1529  for res in ent.residues:
1530  ca_atom = res.FindAtom("CA")
1531  if ca_atom.IsValid():
1532  ca_view.AddAtom(ca_atom)
1533  # finalize
1534  return mol.CreateEntityFromView(ca_view, False)
1535 
1536 def _GetChemGroups(qs_ent, seqid_thr=95.):
1537  """
1538  :return: Intra-complex group of chemically identical polypeptide chains
1539  (see :attr:`QSscoreEntity.chem_groups`)
1540 
1541  :param qs_ent: Entity to process
1542  :type qs_ent: :class:`QSscoreEntity`
1543  :param seqid_thr: Threshold used to decide when two chains are identical.
1544  95 percent tolerates the few mutations crystallographers
1545  like to do.
1546  :type seqid_thr: :class:`float`
1547  """
1548  # get data from qs_ent
1549  ca_chains = qs_ent.ca_chains
1550  chain_names = sorted(ca_chains.keys())
1551  # get pairs of identical chains
1552  # NOTE: this scales quadratically with number of chains and may be optimized
1553  # -> one could merge it with "merge transitive pairs" below...
1554  id_seqs = []
1555  for ch_1, ch_2 in itertools.combinations(chain_names, 2):
1556  aln = qs_ent.GetAlignment(ch_1, ch_2)
1557  if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
1558  id_seqs.append((ch_1, ch_2))
1559  # trivial case: no matching pairs
1560  if not id_seqs:
1561  return [[name] for name in chain_names]
1562 
1563  # merge transitive pairs
1564  groups = []
1565  for ch_1, ch_2 in id_seqs:
1566  found = False
1567  for g in groups:
1568  if ch_1 in g or ch_2 in g:
1569  found = True
1570  g.add(ch_1)
1571  g.add(ch_2)
1572  if not found:
1573  groups.append(set([ch_1, ch_2]))
1574  # sort internally based on sequence length
1575  chem_groups = []
1576  for g in groups:
1577  ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
1578  chem_groups.append([ch for _,ch in ranked_g])
1579  # add other dissimilar chains
1580  for ch in chain_names:
1581  if not any(ch in g for g in chem_groups):
1582  chem_groups.append([ch])
1583 
1584  return chem_groups
1585 
1586 def _GetAngles(Rt):
1587  """Computes the Euler angles given a transformation matrix.
1588 
1589  :param Rt: Rt operator.
1590  :type Rt: :class:`ost.geom.Mat4`
1591  :return: A :class:`tuple` of angles for each axis (x,y,z)
1592  """
1593  rot = np.asmatrix(Rt.ExtractRotation().data).reshape(3,3)
1594  tx = np.arctan2(rot[2,1], rot[2,2])
1595  if tx < 0:
1596  tx += 2*np.pi
1597  ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
1598  if ty < 0:
1599  ty += 2*np.pi
1600  tz = np.arctan2(rot[1,0], rot[0,0])
1601  if tz < 0:
1602  tz += 2*np.pi
1603  return tx,ty,tz
1604 
1605 # QS scorer
1606 
1607 def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
1608  """
1609  :return: Inter-complex mapping of chemical groups
1610  (see :attr:`QSscorer.chem_mapping`)
1611 
1612  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1613  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1614  """
1615  # get chem. groups and unique representative
1616  chem_groups_1 = qs_ent_1.chem_groups
1617  chem_groups_2 = qs_ent_2.chem_groups
1618  repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
1619  repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
1620 
1621  # if entities do not have different number of unique chains, we get the
1622  # mapping for the smaller set
1623  swapped = False
1624  if len(repr_chains_2) < len(repr_chains_1):
1625  repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
1626  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1627  swapped = True
1628 
1629  # find the closest to each chain between the two entities
1630  # NOTE: this may still be sensible to orthology problem
1631  # -> currently we use a global alignment and seq. id. to rank pairs
1632  # -> we also tried local alignments and weighting the seq. id. by the
1633  # coverages of the alignments (gapless string in aln. / seq. length)
1634  # but global aln performed better...
1635  chain_pairs = []
1636  ca_chains_1 = qs_ent_1.ca_chains
1637  ca_chains_2 = qs_ent_2.ca_chains
1638  for ch_1 in repr_chains_1.keys():
1639  for ch_2 in repr_chains_2.keys():
1640  aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
1641  if aln:
1642  chains_seqid = seq.alg.SequenceIdentity(aln)
1643  LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
1644  chain_pairs.append((chains_seqid, ch_1, ch_2))
1645 
1646  # get top matching groups first
1647  chain_pairs = sorted(chain_pairs, reverse=True)
1648  chem_mapping = {}
1649  for _, c1, c2 in chain_pairs:
1650  skip = False
1651  for a,b in chem_mapping.iteritems():
1652  if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
1653  skip = True
1654  break
1655  if not skip:
1656  chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
1657  if swapped:
1658  chem_mapping = {y: x for x, y in chem_mapping.iteritems()}
1659  qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
1660 
1661  # notify chains without partner
1662  mapped_1 = set([i for s in chem_mapping.keys() for i in s])
1663  chains_1 = set([c.name for c in qs_ent_1.ent.chains])
1664  if chains_1 - mapped_1:
1665  LogWarning('Unmapped Chains in %s: %s'
1666  % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
1667 
1668  mapped_2 = set([i for s in chem_mapping.values() for i in s])
1669  chains_2 = set([c.name for c in qs_ent_2.ent.chains])
1670  if chains_2 - mapped_2:
1671  LogWarning('Unmapped Chains in %s: %s'
1672  % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
1673 
1674  # check if we have any chains left
1675  LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
1676  if len(mapped_1) < 1 or len(mapped_2) < 1:
1677  raise QSscoreError('Less than 1 chains left in chem_mapping.')
1678  return chem_mapping
1679 
1680 def _SelectFew(l, max_elements):
1681  """Return l or copy of l with at most *max_elements* entries."""
1682  n_el = len(l)
1683  if n_el <= max_elements:
1684  return l
1685  else:
1686  # cheap integer ceiling (-1 to ensure that x*max_elements gets d_el = x)
1687  d_el = ((n_el - 1) // max_elements) + 1
1688  new_l = list()
1689  for i in range(0, n_el, d_el):
1690  new_l.append(l[i])
1691  return new_l
1692 
1693 def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
1694  clustalw_bin):
1695  """
1696  :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
1697  of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
1698  those views (see :attr:`QSscorer.ent_to_cm_1` and
1699  :attr:`QSscorer.ent_to_cm_2`)
1700 
1701  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1702  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1703  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1704  :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
1705  """
1706  # make sure name doesn't contain spaces and is unique
1707  def _FixName(seq_name, seq_names):
1708  # get rid of spaces and make it unique
1709  seq_name = seq_name.replace(' ', '-')
1710  while seq_name in seq_names:
1711  seq_name += '-'
1712  return seq_name
1713  # resulting views into CA entities using CA chain sequences
1714  ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
1715  ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
1716  ca_chains_1 = qs_ent_1.ca_chains
1717  ca_chains_2 = qs_ent_2.ca_chains
1718  # go through all mapped chemical groups
1719  for group_1, group_2 in chem_mapping.iteritems():
1720  # MSA with ClustalW
1721  seq_list = seq.CreateSequenceList()
1722  # keep sequence-name (must be unique) to view mapping
1723  seq_to_empty_view = dict()
1724  for ch in group_1:
1725  sequence = ca_chains_1[ch].Copy()
1726  sequence.name = _FixName(qs_ent_1.GetName() + '.' + ch, seq_to_empty_view)
1727  seq_to_empty_view[sequence.name] = ent_view_1
1728  seq_list.AddSequence(sequence)
1729  for ch in group_2:
1730  sequence = ca_chains_2[ch].Copy()
1731  sequence.name = _FixName(qs_ent_2.GetName() + '.' + ch, seq_to_empty_view)
1732  seq_to_empty_view[sequence.name] = ent_view_2
1733  seq_list.AddSequence(sequence)
1734  alnc = ClustalW(seq_list, clustalw=clustalw_bin)
1735 
1736  # collect aligned residues (list of lists of sequence_count valid residues)
1737  residue_lists = list()
1738  sequence_count = alnc.sequence_count
1739  for col in alnc:
1740  residues = [col.GetResidue(ir) for ir in range(sequence_count)]
1741  if all([r.IsValid() for r in residues]):
1742  residue_lists.append(residues)
1743  # check number of aligned residues
1744  if len(residue_lists) < 5:
1745  raise QSscoreError('Not enough aligned residues.')
1746  elif max_ca_per_chain > 0:
1747  residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
1748  # check what view is for which residue
1749  res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
1750  for ir in range(sequence_count)]
1751  # add to view (note: only 1 CA atom per residue in here)
1752  for res_list in residue_lists:
1753  for res, view in zip(res_list, res_views):
1754  view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
1755  # done
1756  return ent_view_1, ent_view_2
1757 
1758 
1759 def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
1760  """
1761  :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
1762  and :attr:`QSscorer.symm_2`) between the two structures.
1763  Empty lists if no symmetry identified.
1764 
1765  :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
1766  :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
1767  :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
1768  :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
1769  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1770  """
1771  LogInfo('Identifying Symmetry Groups...')
1772 
1773  # get possible symmetry groups
1774  symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
1775  chem_mapping.keys())
1776  symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
1777  chem_mapping.values())
1778 
1779  # check combination of groups
1780  LogInfo('Selecting Symmetry Groups...')
1781  # check how many mappings are to be checked for each pair of symmetry groups
1782  # -> lower number here will speed up worst-case runtime of _GetChainMapping
1783  # NOTE: this is tailored to speed up brute force all vs all mapping test
1784  # for preferred _CheckClosedSymmetry this is suboptimal!
1785  best_symm = []
1786  for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
1787  s1 = symm_1[0]
1788  s2 = symm_2[0]
1789  if len(s1) != len(s2):
1790  LogDebug('Discarded', str(symm_1), str(symm_2),
1791  ': different size of symmetry groups')
1792  continue
1793  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1794  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1795  LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
1796  best_symm.append((nr_mapp, symm_1, symm_2))
1797 
1798  for _, symm_1, symm_2 in sorted(best_symm):
1799  s1 = symm_1[0]
1800  s2 = symm_2[0]
1801  group_1 = ent_to_cm_1.Select('cname=%s' % _FixSelectChainNames(s1))
1802  group_2 = ent_to_cm_2.Select('cname=%s' % _FixSelectChainNames(s2))
1803  # check if by superposing a pair of chains within the symmetry group to
1804  # superpose all chains within the symmetry group
1805  # -> if successful, the symmetry groups are compatible
1806  closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
1807  chem_mapping, 6, 0.8, find_best=False)
1808 
1809  if closed_symm:
1810  return symm_1, symm_2
1811 
1812  # nothing found
1813  LogInfo('No coherent symmetry identified between structures')
1814  return [], []
1815 
1816 
1817 def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping,
1818  max_mappings_extensive):
1819  """
1820  :return: Tuple with mapping from *ent_1* to *ent_2* (see
1821  :attr:`QSscorer.chain_mapping`) and scheme used (see
1822  :attr:`QSscorer.chain_mapping_scheme`)
1823 
1824  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
1825  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
1826  :param symm_1: See :attr:`QSscorer.symm_1`
1827  :param symm_2: See :attr:`QSscorer.symm_2`
1828  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
1829  :param max_mappings_extensive: See :attr:`QSscorer.max_mappings_extensive`
1830  """
1831  LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
1832  LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
1833 
1834  # quick check for closed symmetries
1835  thresholds = [( 'strict', 4, 0.8),
1836  ( 'tolerant', 6, 0.4),
1837  ('permissive', 8, 0.2)]
1838  for scheme, sup_thr, sup_fract in thresholds:
1839  # check if by superposing a pair of chains within the symmetry group to
1840  # superpose all chains of the two oligomers
1841  # -> this also returns the resulting mapping of chains
1842  mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
1843  chem_mapping, sup_thr, sup_fract)
1844  if mapping:
1845  LogInfo('Closed Symmetry with %s parameters' % scheme)
1846  if scheme == 'permissive':
1847  LogWarning('Permissive thresholds used for overlap based mapping ' + \
1848  'detection: check mapping manually: %s' % mapping)
1849  return mapping, scheme
1850 
1851  # NOTE that what follows below is sub-optimal:
1852  # - if the two structures don't fit at all, we may map chains rather randomly
1853  # - we may need a lot of operations to find globally "best" mapping
1854  # -> COST = O(N^2) * O(N!)
1855  # (first = all pairwise chain-superpose, latter = all chain mappings)
1856  # -> a greedy chain mapping choice algo (pick best RMSD for each one-by-one)
1857  # could reduce this to O(N^2) * O(N^2) but it would need to be validated
1858  # - one could also try some other heuristic based on center of atoms of chains
1859  # -> at this point we get a bad mapping anyways...
1860 
1861  # if we get here, we will need to check many combinations of mappings
1862  # -> check how many mappings must be checked and limit
1863  count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
1864  LogInfo('Intra Symmetry-group mappings to check: %s' \
1865  % count['intra']['mappings'])
1866  LogInfo('Inter Symmetry-group mappings to check: %s' \
1867  % count['inter']['mappings'])
1868  nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
1869  if nr_mapp > max_mappings_extensive:
1870  raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
1871 
1872  # to speed up the computations we cache chain views and RMSDs
1873  cached_rmsd = _CachedRMSD(ent_1, ent_2)
1874 
1875  # get INTRA symmetry group chain mapping
1876  check = 0
1877  intra_mappings = [] # list of (RMSD, c1, c2, mapping) tuples (best superpose)
1878  # limit chem mapping to reference symmetry group
1879  ref_symm_1 = symm_1[0]
1880  ref_symm_2 = symm_2[0]
1881  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
1882  # for each chemically identical group
1883  for g1, g2 in asu_chem_mapping.iteritems():
1884  # try to superpose all possible pairs
1885  for c1, c2 in itertools.product(g1, g2):
1886  # get superposition transformation
1887  LogVerbose('Superposing chains: %s, %s' % (c1, c2))
1888  res = cached_rmsd.GetSuperposition(c1, c2)
1889  # compute RMSD of possible mappings
1890  cur_mappings = [] # list of (RMSD, mapping) tuples
1891  for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
1892  check += 1
1893  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1894  cur_mappings.append((chains_rmsd, mapping))
1895  LogVerbose(str(mapping), str(chains_rmsd))
1896  best_rmsd, best_mapp = min(cur_mappings)
1897  intra_mappings.append((best_rmsd, c1, c2, best_mapp))
1898  # best chain-chain superposition defines the intra asu mapping
1899  _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
1900 
1901  # if only one asu is present in any of the complexes, we're done
1902  if len(symm_1) == 1 or len(symm_2) == 1:
1903  mapping = intra_asu_mapp
1904  else:
1905  # the mapping is the element position within the asu chain list
1906  # -> this speed up a lot, assuming that the order of chain in asu groups
1907  # is following the order of symmetry operations
1908  index_asu_c1 = ref_symm_1.index(intra_asu_c1)
1909  index_asu_c2 = ref_symm_2.index(intra_asu_c2)
1910  index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
1911  for s1, s2 in intra_asu_mapp.iteritems()}
1912  LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
1913 
1914  # get INTER symmetry group chain mapping
1915  # we take the first symmetry group from the complex having the most
1916  if len(symm_1) < len(symm_2):
1917  symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
1918  else:
1919  symm_combinations = list(itertools.product([symm_1[0]], symm_2))
1920 
1921  full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
1922  full_mappings = [] # list of (RMSD, mapping) tuples
1923  for s1, s2 in symm_combinations:
1924  # get superposition transformation (we take best chain-pair in asu)
1925  LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
1926  % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
1927  res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
1928  # compute RMSD of possible mappings
1929  for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
1930  check += 1
1931  # need to extract full chain mapping (use indexing)
1932  mapping = {}
1933  for a, b in asu_mapp.iteritems():
1934  for id_a, id_b in index_mapp.iteritems():
1935  mapping[a[id_a]] = b[id_b]
1936  chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
1937  full_mappings.append((chains_rmsd, mapping))
1938  LogVerbose(str(mapping), str(chains_rmsd))
1939 
1940  if check != nr_mapp:
1941  LogWarning('Mapping number estimation was wrong') # sanity check
1942 
1943  # return best (lowest RMSD) mapping
1944  mapping = min(full_mappings)[1]
1945 
1946  LogWarning('Extensive search used for mapping detection (fallback). This ' + \
1947  'approach has known limitations. Check mapping manually: %s' \
1948  % mapping)
1949  return mapping, 'extensive'
1950 
1951 
1952 def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
1953  """
1954  Identify the symmetry (either cyclic C or dihedral D) of the protein and find
1955  all possible symmetry subgroups. This is done testing all combination of chain
1956  superposition and clustering them by the angles (D) or the axis (C) of the Rt
1957  operator.
1958 
1959  Groups of superposition which can fully reconstruct the structure are possible
1960  symmetry subgroups.
1961 
1962  :param qs_ent: Entity with cached angles and axis.
1963  :type qs_ent: :class:`QSscoreEntity`
1964  :param ent: Entity from which to extract chains (perfect alignment assumed
1965  for superposition as in :attr:`QSscorer.ent_to_cm_1`).
1966  :type ent: :class:`EntityHandle` or :class:`EntityView`
1967  :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
1968  contains all chains belonging to a chem. group (could be
1969  from :attr:`QSscoreEntity.chem_groups` or from "keys()"
1970  or "values()" of :attr:`QSscorer.chem_mapping`)
1971 
1972  :return: A list of possible symmetry subgroups (each in same format as
1973  :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
1974  with a single symmetry subgroup with a single group with all chains.
1975  """
1976  # get angles / axis for pairwise transformations within same chem. group
1977  angles = {}
1978  axis = {}
1979  for cnames in chem_groups:
1980  for c1, c2 in itertools.combinations(cnames, 2):
1981  cur_angles = qs_ent.GetAngles(c1, c2)
1982  if not np.any(np.isnan(cur_angles)):
1983  angles[(c1,c2)] = cur_angles
1984  cur_axis = qs_ent.GetAxis(c1, c2)
1985  if not np.any(np.isnan(cur_axis)):
1986  axis[(c1,c2)] = cur_axis
1987 
1988  # cluster superpositions angles at different thresholds
1989  symm_groups = []
1990  LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
1991  for angle_thr in np.arange(0.1, 1, 0.1):
1992  d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
1993  if d_symm_groups:
1994  for group in d_symm_groups:
1995  if group not in symm_groups:
1996  symm_groups.append(group)
1997  LogVerbose('Dihedral: %s' % group)
1998  LogInfo('Symmetry threshold %.1f used for angles of %s' \
1999  % (angle_thr, ent.GetName()))
2000  break
2001 
2002  # cluster superpositions axis at different thresholds
2003  for axis_thr in np.arange(0.1, 1, 0.1):
2004  c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
2005  if c_symm_groups:
2006  for group in c_symm_groups:
2007  if group not in symm_groups:
2008  symm_groups.append(group)
2009  LogVerbose('Cyclic: %s' % group)
2010  LogInfo('Symmetry threshold %.1f used for axis of %s' \
2011  % (axis_thr, ent.GetName()))
2012  break
2013 
2014  # fall back to single "group" containing all chains if none found
2015  if not symm_groups:
2016  LogInfo('No symmetry-group detected in %s' % ent.GetName())
2017  symm_groups = [[tuple([c for g in chem_groups for c in g])]]
2018 
2019  return symm_groups
2020 
2021 def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
2022  """
2023  :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
2024  (same return type as there). Empty list if fail.
2025 
2026  :param ent: See :func:`_GetSymmetrySubgroups`
2027  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2028  :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
2029  :param angle_thr: Euler angles distance threshold for clustering (float).
2030  """
2031  # cluster superpositions angles
2032  if len(angles) == 0:
2033  # nothing to be done here
2034  return []
2035  else:
2036  same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
2037 
2038  # get those which are non redundant and covering all chains
2039  symm_groups = []
2040  for clst in same_angles.values():
2041  group = clst.keys()
2042  if _ValidChainGroup(group, ent):
2043  if len(chem_groups) > 1:
2044  # if hetero, we want to group toghether different chains only
2045  symm_groups.append(zip(*group))
2046  else:
2047  # if homo, we also want pairs
2048  symm_groups.append(group)
2049  symm_groups.append(zip(*group))
2050  return symm_groups
2051 
2052 def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
2053  """
2054  :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
2055  (same return type as there). Empty list if fail.
2056 
2057  :param ent: See :func:`_GetSymmetrySubgroups`
2058  :param chem_groups: See :func:`_GetSymmetrySubgroups`
2059  :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
2060  :param angle_thr: Axis distance threshold for clustering (float).
2061  """
2062  # cluster superpositions axis
2063  if len(axis) == 0:
2064  # nothing to be done here
2065  return []
2066  else:
2067  same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
2068 
2069  # use to get grouping
2070  symm_groups = []
2071  for clst in same_axis.values():
2072  all_chain = [item for sublist in clst.keys() for item in sublist]
2073  if len(set(all_chain)) == ent.chain_count:
2074  # if we have an hetero we can exploit cyclic symmetry for grouping
2075  if len(chem_groups) > 1:
2076  ref_chem_group = chem_groups[0]
2077  # try two criteria for grouping
2078  cyclic_group_closest = []
2079  cyclic_group_iface = []
2080  for c1 in ref_chem_group:
2081  group_closest = [c1]
2082  group_iface = [c1]
2083  for chains in chem_groups[1:]:
2084  # center of atoms distance
2085  closest = _GetClosestChain(ent, c1, chains)
2086  # chain with bigger interface
2087  closest_iface = _GetClosestChainInterface(ent, c1, chains)
2088  group_closest.append(closest)
2089  group_iface.append(closest_iface)
2090  cyclic_group_closest.append(tuple(group_closest))
2091  cyclic_group_iface.append(tuple(group_iface))
2092  if _ValidChainGroup(cyclic_group_closest, ent):
2093  symm_groups.append(cyclic_group_closest)
2094  if _ValidChainGroup(cyclic_group_iface, ent):
2095  symm_groups.append(cyclic_group_iface)
2096  # if it is a homo, there's not much we can group
2097  else:
2098  symm_groups.append(chem_groups)
2099  return symm_groups
2100 
2101 def _ClusterData(data, thr, metric):
2102  """Wrapper for fclusterdata to get dict of clusters.
2103 
2104  :param data: :class:`dict` (keys for ID, values used for clustering)
2105  :return: :class:`dict` {cluster_idx: {data-key: data-value}}
2106  """
2107  # special case length 1
2108  if len(data) == 1:
2109  return {0: {data.keys()[0]: data.values()[0]} }
2110  # do the clustering
2111  cluster_indices = fclusterdata(np.asarray(data.values()), thr,
2112  method='complete', criterion='distance',
2113  metric=metric)
2114  # fclusterdata output is cluster ids -> put into dict of clusters
2115  res = {}
2116  for cluster_idx, data_key in zip(cluster_indices, data.keys()):
2117  if not cluster_idx in res:
2118  res[cluster_idx] = {}
2119  res[cluster_idx][data_key] = data[data_key]
2120  return res
2121 
2122 def _AngleArrayDistance(u, v):
2123  """
2124  :return: Average angular distance of two arrays of angles.
2125  :param u: Euler angles.
2126  :param v: Euler angles.
2127  """
2128  dist = []
2129  for a,b in zip(u,v):
2130  delta = abs(a-b)
2131  if delta > np.pi:
2132  delta = abs(2*np.pi - delta)
2133  dist.append(delta)
2134  return np.mean(dist)
2135 
2136 def _AxisDistance(u, v):
2137  """
2138  :return: Euclidean distance between two rotation axes. Axes can point in
2139  either direction so we ensure to use the closer one.
2140  :param u: Rotation axis.
2141  :param v: Rotation axis.
2142  """
2143  # get axes as arrays (for convenience)
2144  u = np.asarray(u)
2145  v = np.asarray(v)
2146  # get shorter of two possible distances (using v or -v)
2147  dv1 = u - v
2148  dv2 = u + v
2149  d1 = np.dot(dv1, dv1)
2150  d2 = np.dot(dv2, dv2)
2151  return np.sqrt(min(d1, d2))
2152 
2153 def _GetClosestChain(ent, ref_chain, chains):
2154  """
2155  :return: Chain closest to *ref_chain* based on center of atoms distance.
2156  :rtype: :class:`str`
2157  :param ent: See :func:`_GetSymmetrySubgroups`
2158  :param ref_chain: We look for chains closest to this one
2159  :type ref_chain: :class:`str`
2160  :param chains: We only consider these chains
2161  :type chains: :class:`list` of :class:`str`
2162  """
2163  contacts = []
2164  ref_center = ent.FindChain(ref_chain).center_of_atoms
2165  for ch in chains:
2166  center = ent.FindChain(ch).center_of_atoms
2167  contacts.append((geom.Distance(ref_center, center), ch))
2168  closest_chain = min(contacts)[1]
2169  return closest_chain
2170 
2171 def _GetClosestChainInterface(ent, ref_chain, chains):
2172  """
2173  :return: Chain with biggest interface (within 10 A) to *ref_chain*.
2174  :rtype: :class:`str`
2175  :param ent: See :func:`_GetSymmetrySubgroups`
2176  :param ref_chain: We look for chains closest to this one
2177  :type ref_chain: :class:`str`
2178  :param chains: We only consider these chains
2179  :type chains: :class:`list` of :class:`str`
2180  """
2181  # NOTE: this is computed on a subset of the full biounit and might be
2182  # inaccurate. Also it could be extracted from QSscoreEntity.contacts.
2183  closest = []
2184  for ch in chains:
2185  iface_view = ent.Select('cname="%s" and 10 <> [cname="%s"]' \
2186  % (ref_chain, ch))
2187  nr_res = iface_view.residue_count
2188  closest.append((nr_res, ch))
2189  closest_chain = max(closest)[1]
2190  return closest_chain
2191 
2192 def _ValidChainGroup(group, ent):
2193  """
2194  :return: True, if *group* has unique chain names and as many chains as *ent*
2195  :rtype: :class:`bool`
2196  :param group: Symmetry groups to check
2197  :type group: Same as :attr:`QSscorer.symm_1`
2198  :param ent: See :func:`_GetSymmetrySubgroups`
2199  """
2200  all_chain = [item for sublist in group for item in sublist]
2201  if len(all_chain) == len(set(all_chain)) and\
2202  len(all_chain) == ent.chain_count:
2203  return True
2204  return False
2205 
2206 def _LimitChemMapping(chem_mapping, limit_1, limit_2):
2207  """
2208  :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
2209  :rtype: Same as :attr:`QSscorer.chem_mapping`
2210  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2211  :param limit_1: Limits chain names in chem_mapping.keys()
2212  :type limit_1: List/tuple of strings
2213  :param limit_2: Limits chain names in chem_mapping.values()
2214  :type limit_2: List/tuple of strings
2215  """
2216  # use set intersection to keep only mapping for chains in limit_X
2217  limit_1_set = set(limit_1)
2218  limit_2_set = set(limit_2)
2219  asu_chem_mapping = dict()
2220  for key, value in chem_mapping.iteritems():
2221  new_key = tuple(set(key) & limit_1_set)
2222  if new_key:
2223  asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
2224  return asu_chem_mapping
2225 
2226 
2227 def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
2228  """
2229  :return: Dictionary of number of mappings and superpositions to be performed.
2230  Returned as *result[X][Y] = number* with X = "intra" or "inter" and
2231  Y = "mappings" or "superpositions". The idea is that for each
2232  pairwise superposition we check all possible mappings.
2233  We can check the combinations within (intra) a symmetry group and
2234  once established, we check the combinations between different (inter)
2235  symmetry groups.
2236  :param symm_1: See :attr:`QSscorer.symm_1`
2237  :param symm_2: See :attr:`QSscorer.symm_2`
2238  :param chem_mapping: See :attr:`QSscorer.chem_mapping`
2239  """
2240  # setup
2241  c = {}
2242  c['intra'] = {}
2243  c['inter'] = {}
2244  c['intra']['mappings'] = 0
2245  c['intra']['superpositions'] = 0
2246  c['inter']['mappings'] = 0
2247  c['inter']['superpositions'] = 0
2248  # intra ASU mappings within reference symmetry groups
2249  ref_symm_1 = symm_1[0]
2250  ref_symm_2 = symm_2[0]
2251  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2252  for g1, g2 in asu_chem_mapping.iteritems():
2253  chain_superpositions = len(g1) * len(g2)
2254  c['intra']['superpositions'] += chain_superpositions
2255  map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
2256  c['intra']['mappings'] += chain_superpositions * map_per_sup
2257  if len(symm_1) == 1 or len(symm_2) == 1:
2258  return c
2259  # inter ASU mappings
2260  asu_superposition = min(len(symm_1), len(symm_2))
2261  c['inter']['superpositions'] = asu_superposition
2262  asu = {tuple(symm_1): tuple(symm_2)}
2263  map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
2264  c['inter']['mappings'] = asu_superposition * map_per_sup
2265  return c
2266 
2267 def _PermutationOrCombinations(sup1, sup2, chem_mapping):
2268  """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
2269  # remove superposed elements, put smallest complex as key
2270  chem_map = {}
2271  for a,b in chem_mapping.iteritems():
2272  new_a = tuple([x for x in a if x != sup1])
2273  new_b = tuple([x for x in b if x != sup2])
2274  chem_map[new_a] = new_b
2275  mapp_nr = 1.0
2276  for a, b in chem_map.iteritems():
2277  if len(a) == len(b):
2278  mapp_nr *= factorial(len(a))
2279  elif len(a) < len(b):
2280  mapp_nr *= binom(len(b), len(a))
2281  elif len(a) > len(b):
2282  mapp_nr *= binom(len(a), len(b))
2283  return int(mapp_nr)
2284 
2285 def _ListPossibleMappings(sup1, sup2, chem_mapping):
2286  """
2287  Return a flat list of all possible mappings given *chem_mapping* and keeping
2288  mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
2289  this is all the mappings to check for a given superposition.
2290 
2291  Elements in first complex are defined by *chem_mapping.keys()* (list of list
2292  of elements) and elements in second complex by *chem_mapping.values()*. If
2293  complexes don't have same number of elements, we map only elements for the one
2294  with less. Also mapping is only between elements of mapped groups according to
2295  *chem_mapping*.
2296 
2297  :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
2298  value = element in chem_mapping-value)
2299  :param sup1: Element for which mapping is fixed.
2300  :type sup1: Like element in chem_mapping-key
2301  :param sup2: Element for which mapping is fixed.
2302  :type sup2: Like element in chem_mapping-value
2303  :param chem_mapping: Defines mapping between groups of elements (e.g. result
2304  of :func:`_LimitChemMapping`).
2305  :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
2306 
2307  :raises: :class:`QSscoreError` if reference complex (first one or one with
2308  less elements) has more elements for any given mapped group.
2309  """
2310  # find smallest complex
2311  chain1 = [i for s in chem_mapping.keys() for i in s]
2312  chain2 = [i for s in chem_mapping.values() for i in s]
2313  swap = False
2314  if len(chain1) > len(chain2):
2315  swap = True
2316  # remove superposed elements, put smallest complex as key
2317  chem_map = {}
2318  for a, b in chem_mapping.iteritems():
2319  new_a = tuple([x for x in a if x != sup1])
2320  new_b = tuple([x for x in b if x != sup2])
2321  if swap:
2322  chem_map[new_b] = new_a
2323  else:
2324  chem_map[new_a] = new_b
2325  # generate permutations or combinations of chemically
2326  # equivalent chains
2327  chem_perm = []
2328  chem_ref = []
2329  for a, b in chem_map.iteritems():
2330  if len(a) == len(b):
2331  chem_perm.append(list(itertools.permutations(b)))
2332  chem_ref.append(a)
2333  elif len(a) < len(b):
2334  chem_perm.append(list(itertools.combinations(b, len(a))))
2335  chem_ref.append(a)
2336  else:
2337  raise QSscoreError('Impossible to define reference group: %s' \
2338  % str(chem_map))
2339  # save the list of possible mappings
2340  mappings = []
2341  flat_ref = [i for s in chem_ref for i in s]
2342  for perm in itertools.product(*chem_perm):
2343  flat_perm = [i for s in perm for i in s]
2344  d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
2345  if swap:
2346  d = {v: k for k, v in d.items()}
2347  d.update({sup1: sup2})
2348  mappings.append(d)
2349  return mappings
2350 
2351 
2352 def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
2353  sup_thr=4, sup_fract=0.8, find_best=True):
2354  """
2355  Quick check if we can superpose two chains and get a mapping for all other
2356  chains using the same transformation. The mapping is defined by sufficient
2357  overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
2358 
2359  :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
2360  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
2361  Views are ok but only to select full chains!
2362  :param ent_2: Entity to map to (perfect alignment assumed between
2363  chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
2364  Views are ok but only to select full chains!
2365  :param symm_1: Symmetry groups to use. We only superpose chains within
2366  reference symmetry group of *symm_1* and *symm_2*.
2367  See :attr:`QSscorer.symm_1`
2368  :param symm_2: See :attr:`QSscorer.symm_2`
2369  :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
2370  All chains in *ent_1* / *ent_2* must be listed here!
2371  :param sup_thr: Distance around transformed chain in *ent_1* to check for
2372  overlap.
2373  :type sup_thr: :class:`float`
2374  :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
2375  overlapped for overlap to be sufficient.
2376  :type sup_fract: :class:`float`
2377  :param find_best: If True, we look for best mapping according to
2378  :func:`_GetMappedRMSD`. Otherwise, we return first suitable
2379  mapping.
2380  :type find_best: :class:`bool`
2381 
2382  :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
2383  found, is as in :attr:`QSscorer.chain_mapping`.
2384  :rtype: :class:`dict` (:class:`str` / :class:`str`)
2385  """
2386  # limit chem mapping to reference symmetry group
2387  ref_symm_1 = symm_1[0]
2388  ref_symm_2 = symm_2[0]
2389  asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
2390  # for each chemically identical group
2391  rmsd_mappings = []
2392  for g1, g2 in asu_chem_mapping.iteritems():
2393  # try to superpose all possible pairs
2394  # -> note that some chain-chain combinations may work better than others
2395  # to superpose the full oligomer (e.g. if some chains are open/closed)
2396  for c1, c2 in itertools.product(g1, g2):
2397  # get superposition transformation
2398  chain_1 = ent_1.Select('cname="%s"' % c1)
2399  chain_2 = ent_2.Select('cname="%s"' % c2)
2400  res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2401  # look for overlaps
2402  mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
2403  res.transformation, sup_thr,
2404  sup_fract)
2405  if not mapping:
2406  continue
2407  # early abort if we only want the first one
2408  if not find_best:
2409  return mapping
2410  else:
2411  # get RMSD for sorting
2412  rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
2413  rmsd_mappings.append((rmsd, mapping))
2414  # return best mapping
2415  if rmsd_mappings:
2416  return min(rmsd_mappings)[1]
2417  else:
2418  return None
2419 
2420 def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
2421  sup_thr, sup_fract):
2422  """
2423  :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
2424  (see :func:`_CheckClosedSymmetry`).
2425  :param ent_1: See :func:`_CheckClosedSymmetry`
2426  :param ent_2: See :func:`_CheckClosedSymmetry`
2427  :param chem_mapping: See :func:`_CheckClosedSymmetry`
2428  :param transformation: Superposition transformation to be applied to *ent_1*.
2429  :param sup_thr: See :func:`_CheckClosedSymmetry`
2430  :param sup_fract: See :func:`_CheckClosedSymmetry`
2431  """
2432  # NOTE: this seems to be the comput. most expensive part in QS scoring
2433  # -> it could be moved to C++ for speed-up
2434  # -> the next expensive bits are ClustalW and GetContacts btw...
2435 
2436  # swap if needed (ent_1 must be shorter or same)
2437  if ent_1.chain_count > ent_2.chain_count:
2438  swap = True
2439  ent_1, ent_2 = ent_2, ent_1
2440  transformation = geom.Invert(transformation)
2441  chem_pairs = zip(chem_mapping.values(), chem_mapping.keys())
2442  else:
2443  swap = False
2444  chem_pairs = chem_mapping.iteritems()
2445  # sanity check
2446  if ent_1.chain_count == 0:
2447  return None
2448  # extract valid partners for each chain in ent_1
2449  chem_partners = dict()
2450  for cg1, cg2 in chem_pairs:
2451  for ch in cg1:
2452  chem_partners[ch] = set(cg2)
2453  # get mapping for each chain in ent_1
2454  mapping = dict()
2455  mapped_chains = set()
2456  for ch_1 in ent_1.chains:
2457  # get all neighbors split by chain (NOTE: this muight be moved to C++)
2458  ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
2459  for a_1 in ch_1.handle.atoms:
2460  transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
2461  a_within = ent_2.FindWithin(transformed_pos, sup_thr)
2462  for a_2 in a_within:
2463  ch_atoms[a_2.chain.name].add(a_2.hash_code)
2464  # get one with most atoms in overlap
2465  max_num, max_name = max((len(atoms), name)
2466  for name, atoms in ch_atoms.iteritems())
2467  # early abort if overlap fraction not good enough
2468  ch_2 = ent_2.FindChain(max_name)
2469  if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
2470  return None
2471  # early abort if mapped to chain of different chem. group
2472  ch_1_name = ch_1.name
2473  if ch_1_name not in chem_partners:
2474  raise RuntimeError("Chem. mapping doesn't contain all chains!")
2475  if max_name not in chem_partners[ch_1_name]:
2476  return None
2477  # early abort if multiple ones mapped to same chain
2478  if max_name in mapped_chains:
2479  return None
2480  # set mapping
2481  mapping[ch_1_name] = max_name
2482  mapped_chains.add(max_name)
2483  # unswap if needed and return
2484  if swap:
2485  mapping = {v: k for k, v in mapping.iteritems()}
2486  return mapping
2487 
2488 def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
2489  """
2490  :return: RMSD between complexes considering chain mapping.
2491  :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
2492  mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
2493  :param ent_2: Entity which was mapped to (perfect alignment assumed between
2494  mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
2495  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2496  :param transformation: Superposition transformation to be applied to *ent_1*.
2497  """
2498  # collect RMSDs and atom counts chain by chain and combine afterwards
2499  rmsds = []
2500  atoms = []
2501  for c1, c2 in chain_mapping.iteritems():
2502  # get views and atom counts
2503  chain_1 = ent_1.Select('cname="%s"' % c1)
2504  chain_2 = ent_2.Select('cname="%s"' % c2)
2505  atom_count = chain_1.atom_count
2506  if atom_count != chain_2.atom_count:
2507  raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
2508  # get RMSD
2509  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2510  # update lists
2511  rmsds.append(rmsd)
2512  atoms.append(float(atom_count))
2513  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2514  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2515  return rmsd
2516 
2518  """Helper object for repetitive RMSD calculations.
2519  Meant to speed up :func:`_GetChainMapping` but could also be used to replace
2520  :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
2521 
2522  :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
2523  :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
2524  """
2525  def __init__(self, ent_1, ent_2):
2526  # initialize caches and keep entities
2527  self.ent_1 = ent_1
2528  self.ent_2 = ent_2
2529  self._chain_views_1 = dict()
2530  self._chain_views_2 = dict()
2531  self._pair_rmsd = dict() # key = (c1,c2), value = (atom_count,rmsd)
2532 
2533  def GetChainView1(self, cname):
2534  """Get cached view on chain *cname* for :attr:`ent_1`."""
2535  if cname not in self._chain_views_1:
2536  self._chain_views_1[cname] = self.ent_1.Select('cname="%s"' % cname)
2537  return self._chain_views_1[cname]
2538 
2539  def GetChainView2(self, cname):
2540  """Get cached view on chain *cname* for :attr:`ent_2`."""
2541  if cname not in self._chain_views_2:
2542  self._chain_views_2[cname] = self.ent_2.Select('cname="%s"' % cname)
2543  return self._chain_views_2[cname]
2544 
2545  def GetSuperposition(self, c1, c2):
2546  """Get superposition result (no change in entities!) for *c1* to *c2*.
2547  This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
2548 
2549  :param c1: chain name for :attr:`ent_1`.
2550  :param c2: chain name for :attr:`ent_2`.
2551  """
2552  # invalidate _pair_rmsd
2553  self._pair_rmsd = dict()
2554  # superpose
2555  chain_1 = self.GetChainView1(c1)
2556  chain_2 = self.GetChainView2(c2)
2557  if chain_1.atom_count != chain_2.atom_count:
2558  raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
2559  return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
2560 
2561  def GetMappedRMSD(self, chain_mapping, transformation):
2562  """
2563  :return: RMSD between complexes considering chain mapping.
2564  :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
2565  :param transformation: Superposition transformation (e.g. res.transformation
2566  for res = :func:`GetSuperposition`).
2567  """
2568  # collect RMSDs and atom counts chain by chain and combine afterwards
2569  rmsds = []
2570  atoms = []
2571  for c1, c2 in chain_mapping.iteritems():
2572  # cached?
2573  if (c1, c2) in self._pair_rmsd:
2574  atom_count, rmsd = self._pair_rmsd[(c1, c2)]
2575  else:
2576  # get views and atom counts
2577  chain_1 = self.GetChainView1(c1)
2578  chain_2 = self.GetChainView2(c2)
2579  atom_count = chain_1.atom_count
2580  if atom_count != chain_2.atom_count:
2581  raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
2582  # get RMSD
2583  rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
2584  self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
2585  # update lists
2586  rmsds.append(rmsd)
2587  atoms.append(float(atom_count))
2588  # combine (reminder: RMSD = sqrt(sum(atom_distance^2)/atom_count))
2589  rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
2590  return rmsd
2591 
2592 
2593 def _CleanUserSymmetry(symm, ent):
2594  """Clean-up user provided symmetry.
2595 
2596  :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
2597  :param ent: Entity from which to extract chain names
2598  :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
2599  :return: New symmetry group limited to chains in sub-structure *ent*
2600  (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
2601  """
2602  # restrict symm to only contain chains in ent
2603  chain_names = set([ch.name for ch in ent.chains])
2604  new_symm = list()
2605  for symm_group in symm:
2606  new_group = tuple(ch for ch in symm_group if ch in chain_names)
2607  if new_group:
2608  new_symm.append(new_group)
2609  # report it
2610  if new_symm != symm:
2611  LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
2612  # do all groups have same length?
2613  lengths = [len(symm_group) for symm_group in new_symm]
2614  if lengths[1:] != lengths[:-1]:
2615  LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
2616  % (ent.GetName()))
2617  return []
2618  # do we cover all chains?
2619  s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
2620  if len(s_chain_names) != sum(lengths):
2621  LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
2622  % (ent.GetName()))
2623  return []
2624  if s_chain_names != chain_names:
2625  LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
2626  % (ent.GetName()))
2627  return []
2628  # ok all good
2629  return new_symm
2630 
2631 def _AreValidSymmetries(symm_1, symm_2):
2632  """Check symmetry pair for major problems.
2633 
2634  :return: False if any of the two is empty or if they're compatible in size.
2635  :rtype: :class:`bool`
2636  """
2637  if not symm_1 or not symm_2:
2638  return False
2639  if len(symm_1) != 1 or len(symm_2) != 1:
2640  if not len(symm_1[0]) == len(symm_2[0]):
2641  LogWarning('Symmetry groups of different sizes. Ignoring them!')
2642  return False
2643  return True
2644 
2645 def _GetMappedAlignments(ent_1, ent_2, chain_mapping, res_num_alignment):
2646  """
2647  :return: Alignments of 2 structures given chain mapping
2648  (see :attr:`QSscorer.alignments`).
2649  :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
2650  Views to this entity attached to first sequence of each aln.
2651  :param ent_2: Entity containing all chains in *chain_mapping.values()*.
2652  Views to this entity attached to second sequence of each aln.
2653  :param chain_mapping: See :attr:`QSscorer.chain_mapping`
2654  :param res_num_alignment: See :attr:`QSscorer.res_num_alignment`
2655  """
2656  alns = list()
2657  for ch_1_name in sorted(chain_mapping):
2658  # get both sequences incl. attached view
2659  ch_1 = ent_1.FindChain(ch_1_name)
2660  ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
2661  if res_num_alignment:
2662  max_res_num = max([r.number.GetNum() for r in ch_1.residues] +
2663  [r.number.GetNum() for r in ch_2.residues])
2664  ch1_aln = ["-"] * max_res_num
2665  ch2_aln = ["-"] * max_res_num
2666  for res in ch_1.residues:
2667  ch1_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2668  ch1_aln = "".join(ch1_aln)
2669  seq_1 = seq.CreateSequence(ch_1.name, str(ch1_aln))
2670  seq_1.AttachView(ch_1.Select(""))
2671  for res in ch_2.residues:
2672  ch2_aln[res.number.GetNum() - 1] = res.GetOneLetterCode()
2673  ch2_aln = "".join(ch2_aln)
2674  seq_2 = seq.CreateSequence(ch_2.name, str(ch2_aln))
2675  seq_2.AttachView(ch_2.Select(""))
2676  # Create alignment
2677  aln = seq.CreateAlignment()
2678  aln.AddSequence(seq_1)
2679  aln.AddSequence(seq_2)
2680  else:
2681  seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
2682  seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
2683  # align them
2684  aln = _AlignAtomSeqs(seq_1, seq_2)
2685  if aln:
2686  alns.append(aln)
2687  return alns
2688 
2689 def _GetMappedResidues(alns):
2690  """
2691  :return: Mapping of shared residues in *alns* (with views attached)
2692  (see :attr:`QSscorer.mapped_residues`).
2693  :param alns: See :attr:`QSscorer.alignments`
2694  """
2695  mapped_residues = dict()
2696  for aln in alns:
2697  # prepare dict for c1
2698  c1 = aln.GetSequence(0).name
2699  mapped_residues[c1] = dict()
2700  # get shared residues
2701  v1, v2 = seq.ViewsFromAlignment(aln)
2702  for res_1, res_2 in zip(v1.residues, v2.residues):
2703  r1 = res_1.number.num
2704  r2 = res_2.number.num
2705  mapped_residues[c1][r1] = r2
2706 
2707  return mapped_residues
2708 
2709 def _GetExtraWeights(contacts, done, mapped_residues):
2710  """Return sum of extra weights for contacts of chains in set and not in done.
2711  :return: Tuple (weight_extra_mapped, weight_extra_all).
2712  weight_extra_mapped only sums if both cX,rX in mapped_residues
2713  weight_extra_all sums all
2714  :param contacts: See :func:`GetContacts` for first entity
2715  :param done: List of (c1, c2, r1, r2) tuples to ignore
2716  :param mapped_residues: See :func:`_GetMappedResidues`
2717  """
2718  weight_extra_mapped = 0
2719  weight_extra_non_mapped = 0
2720  for c1 in contacts:
2721  for c2 in contacts[c1]:
2722  for r1 in contacts[c1][c2]:
2723  for r2 in contacts[c1][c2][r1]:
2724  if (c1, c2, r1, r2) not in done:
2725  weight = _weight(contacts[c1][c2][r1][r2])
2726  if c1 in mapped_residues and r1 in mapped_residues[c1] \
2727  and c2 in mapped_residues and r2 in mapped_residues[c2]:
2728  weight_extra_mapped += weight
2729  else:
2730  weight_extra_non_mapped += weight
2731  return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
2732 
2733 def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
2734  """Get QS scores (see :class:`QSscorer`).
2735 
2736  Note that if some chains are not to be considered at all, they must be removed
2737  from *contacts_1* / *contacts_2* prior to calling this.
2738 
2739  :param contacts_1: See :func:`GetContacts` for first entity
2740  :param contacts_2: See :func:`GetContacts` for second entity
2741  :param mapped_residues: See :func:`_GetMappedResidues`
2742  :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
2743  for *contacts_2*.
2744  :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
2745  :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
2746  see :attr:`QSscorer.global_score`)
2747  """
2748  # keep track of considered contacts as set of (c1,c2,r1,r2) tuples
2749  done_1 = set()
2750  done_2 = set()
2751  weighted_scores = 0
2752  weight_sum = 0
2753  # naming cXY, rXY: X = 1/2 for contact, Y = 1/2/T for mapping (T = tmp)
2754  # -> d1 = contacts_1[c11][c21][r11][r21], d2 = contacts_2[c12][c22][r12][r22]
2755  for c11 in contacts_1:
2756  # map to other one
2757  if c11 not in mapped_residues: continue
2758  c1T = chain_mapping[c11]
2759  # second chain
2760  for c21 in contacts_1[c11]:
2761  # map to other one
2762  if c21 not in mapped_residues: continue
2763  c2T = chain_mapping[c21]
2764  # flip if needed
2765  flipped_chains = (c1T > c2T)
2766  if flipped_chains:
2767  c12, c22 = c2T, c1T
2768  else:
2769  c12, c22 = c1T, c2T
2770  # skip early if no contacts there
2771  if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
2772  # loop over res.num. in c11
2773  for r11 in contacts_1[c11][c21]:
2774  # map to other one and skip if no contacts there
2775  if r11 not in mapped_residues[c11]: continue
2776  r1T = mapped_residues[c11][r11]
2777  # loop over res.num. in c21
2778  for r21 in contacts_1[c11][c21][r11]:
2779  # map to other one and skip if no contacts there
2780  if r21 not in mapped_residues[c21]: continue
2781  r2T = mapped_residues[c21][r21]
2782  # flip if needed
2783  if flipped_chains:
2784  r12, r22 = r2T, r1T
2785  else:
2786  r12, r22 = r1T, r2T
2787  # skip early if no contacts there
2788  if r12 not in contacts_2[c12][c22]: continue
2789  if r22 not in contacts_2[c12][c22][r12]: continue
2790  # ok now we can finally do the scoring
2791  d1 = contacts_1[c11][c21][r11][r21]
2792  d2 = contacts_2[c12][c22][r12][r22]
2793  weight = _weight(min(d1, d2))
2794  weighted_scores += weight * (1 - abs(d1 - d2) / 12)
2795  weight_sum += weight
2796  # keep track of done ones
2797  done_1.add((c11, c21, r11, r21))
2798  done_2.add((c12, c22, r12, r22))
2799 
2800  LogVerbose("Shared contacts: %d" % len(done_1))
2801 
2802  # add the extra weights
2803  weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
2804  mapped_residues_2 = dict()
2805  for c1 in mapped_residues:
2806  c2 = chain_mapping[c1]
2807  mapped_residues_2[c2] = set()
2808  for r1 in mapped_residues[c1]:
2809  mapped_residues_2[c2].add(mapped_residues[c1][r1])
2810  weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
2811  weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
2812  weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
2813 
2814  # get scores
2815  denom_best = weight_sum + weight_extra_mapped
2816  denom_all = weight_sum + weight_extra_all
2817  if denom_best == 0:
2818  QS_best = 0
2819  else:
2820  QS_best = weighted_scores / denom_best
2821  if denom_all == 0:
2822  QS_global = 0
2823  else:
2824  QS_global = weighted_scores / denom_all
2825  return QS_best, QS_global
2826 
2827 def _weight(dist):
2828  """
2829  This weight expresses the probability of two residues to interact given the CB
2830  distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
2831  """
2832  if dist <= 5.0:
2833  return 1
2834  x = (dist-5.0)/4.28;
2835  return np.exp(-2*x*x)
2836 
2837 
2838 def _GetQsSuperposition(alns):
2839  """
2840  :return: Superposition result based on shared CA atoms in *alns*
2841  (with views attached) (see :attr:`QSscorer.superposition`).
2842  :param alns: See :attr:`QSscorer.alignments`
2843  """
2844  # check input
2845  if not alns:
2846  raise QSscoreError('No successful alignments for superposition!')
2847  # prepare views
2848  view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
2849  view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
2850  # collect CA from alignments in proper order
2851  for aln in alns:
2852  for col in aln:
2853  res_1 = col.GetResidue(0)
2854  res_2 = col.GetResidue(1)
2855  if res_1.IsValid() and res_2.IsValid():
2856  ca_1 = res_1.FindAtom('CA')
2857  ca_2 = res_2.FindAtom('CA')
2858  if ca_1.IsValid() and ca_2.IsValid():
2859  view_1.AddAtom(ca_1)
2860  view_2.AddAtom(ca_2)
2861  # superpose them without chainging entities
2862  res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
2863  return res
2864 
2865 
2866 def _AddResidue(edi, res, rnum, chain, calpha_only):
2867  """
2868  Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
2869  Either all atoms added or (if *calpha_only*) only CA.
2870  """
2871  if calpha_only:
2872  ca_atom = res.FindAtom('CA')
2873  if ca_atom.IsValid():
2874  new_res = edi.AppendResidue(chain, res.name, rnum)
2875  edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
2876  else:
2877  new_res = edi.AppendResidue(chain, res.name, rnum)
2878  for atom in res.atoms:
2879  edi.InsertAtom(new_res, atom.name, atom.pos)
2880 
2881 def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only, penalize_extra_chains):
2882  """
2883  Create two new entities (based on the alignments attached views) where all
2884  residues have same numbering (when they're aligned) and they are all pushed to
2885  a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
2886  but not contained in *alns*.
2887 
2888  Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
2889 
2890  :param alns: List of alignments with attached views (first sequence: *ent_1*,
2891  second: *ent_2*). Residue number in single chain is column index
2892  of current alignment + sum of lengths of all previous alignments
2893  (order of alignments as in input list).
2894  :type alns: See :attr:`QSscorer.alignments`
2895  :param ent_1: First entity to process.
2896  :type ent_1: :class:`~ost.mol.EntityHandle`
2897  :param ent_2: Second entity to process.
2898  :type ent_2: :class:`~ost.mol.EntityHandle`
2899  :param calpha_only: If True, we only include CA atoms instead of all.
2900  :type calpha_only: :class:`bool`
2901  :param penalize_extra_chains: If True, extra chains are added to model and
2902  reference. Otherwise, only mapped ones.
2903  :type penalize_extra_chains: :class:`bool`
2904 
2905  :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
2906  :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
2907  """
2908  # first new entity
2909  ent_ren_1 = mol.CreateEntity()
2910  ed_1 = ent_ren_1.EditXCS()
2911  new_chain_1 = ed_1.InsertChain('X')
2912  # second one
2913  ent_ren_2 = mol.CreateEntity()
2914  ed_2 = ent_ren_2.EditXCS()
2915  new_chain_2 = ed_2.InsertChain('X')
2916  # the alignment already contains sorted chains
2917  rnum = 0
2918  chain_done_1 = set()
2919  chain_done_2 = set()
2920  for aln in alns:
2921  chain_done_1.add(aln.GetSequence(0).name)
2922  chain_done_2.add(aln.GetSequence(1).name)
2923  for col in aln:
2924  rnum += 1
2925  # add valid residues to single chain entities
2926  res_1 = col.GetResidue(0)
2927  if res_1.IsValid():
2928  _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
2929  res_2 = col.GetResidue(1)
2930  if res_2.IsValid():
2931  _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
2932  # extra chains?
2933  if penalize_extra_chains:
2934  for chain in ent_1.chains:
2935  if chain.name in chain_done_1:
2936  continue
2937  for res in chain.residues:
2938  rnum += 1
2939  _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
2940  for chain in ent_2.chains:
2941  if chain.name in chain_done_2:
2942  continue
2943  for res in chain.residues:
2944  rnum += 1
2945  _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
2946  # get entity names
2947  ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
2948  ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
2949  # connect atoms
2950  if not conop.GetDefaultLib():
2951  raise RuntimeError("QSscore computation requires a compound library!")
2952  pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
2953  pr.Process(ent_ren_1)
2954  ed_1.UpdateICS()
2955  pr.Process(ent_ren_2)
2956  ed_2.UpdateICS()
2957  return ent_ren_1, ent_ren_2
2958 
2959 
2960 # specify public interface
2961 __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
2962  'GetContacts', 'OligoLDDTScorer', 'MappedLDDTScorer')
def _ComputeAlignedEntities
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:546
def _GetSuperposeData
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:820
Mat4 DLLEXPORT_OST_GEOM Invert(const Mat4 &m)
def _InitScorer
Class internal helpers (anything that doesnt easily work without this class)
Definition: qsscoring.py:1408
Real Distance(const Vec3 &p1, const Vec3 &p2)
returns the distance between two points
Definition: vecmat3_op.hh:199
Three dimensional vector class, using Real precision.
Definition: vec3.hh:42
def _PrepareOligoEntities
Class internal helpers.
Definition: qsscoring.py:1210