00001 """
00002 Scoring of quaternary structures as in Martino's 2017 paper.
00003
00004 .. note ::
00005
00006 Requirements for use:
00007
00008 - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
00009 and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
00010 when executing scripts with ``ost``. Otherwise, you must set this with
00011 :func:`~ost.conop.SetDefaultLib`.
00012 - ClustalW must be installed (unless you provide chain mappings)
00013 - Python modules `numpy` and `scipy` must be installed and available
00014 (e.g. use ``pip install scipy numpy``)
00015
00016 Authors: Gerardo Tauriello, Martino Bertoni
00017 """
00018
00019 from ost import mol, geom, conop, seq, settings
00020 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
00021 from ost.bindings.clustalw import ClustalW
00022 import numpy as np
00023 from scipy.misc import factorial
00024 from scipy.special import binom
00025 from scipy.cluster.hierarchy import fclusterdata
00026 import itertools
00027
00028
00029
00030
00031
00032 class QSscoreError(Exception):
00033 """Exception to be raised for "acceptable" exceptions in QS scoring.
00034
00035 Those are cases we might want to capture for default behavior.
00036 """
00037 pass
00038
00039 class QSscorer:
00040 """Object to compute QS scores.
00041
00042 Simple usage without any precomputed contacts, symmetries and mappings:
00043
00044 .. code-block:: python
00045
00046 import ost
00047 from ost.mol.alg import qsscoring
00048
00049 # load two biounits to compare
00050 ent_full = ost.io.LoadPDB('3ia3', remote=True)
00051 ent_1 = ent_full.Select('cname=A,D')
00052 ent_2 = ent_full.Select('cname=B,C')
00053 # get score
00054 ost.PushVerbosityLevel(3)
00055 try:
00056 qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
00057 ost.LogScript('QSscore:', str(qs_scorer.global_score))
00058 ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
00059 except qsscoring.QSscoreError as ex:
00060 # default handling: report failure and set score to 0
00061 ost.LogError('QSscore failed:', str(ex))
00062 qs_score = 0
00063
00064 For maximal performance when computing QS scores of the same entity with many
00065 others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
00066
00067 Any known / precomputed information can be filled into the appropriate
00068 attribute here (no checks done!). Otherwise most quantities are computed on
00069 first access and cached (lazy evaluation). Setters are provided to set values
00070 with extra checks (e.g. :func:`SetSymmetries`).
00071
00072 All necessary seq. alignments are done by global BLOSUM62-based alignment. A
00073 multiple sequence alignment is performed with ClustalW unless
00074 :attr:`chain_mapping` is provided manually. You will need to have an
00075 executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
00076 :attr:`clustalw_bin` accordingly. Otherwise an exception
00077 (:class:`ost.settings.FileNotFound`) is thrown.
00078
00079 Formulas for QS scores:
00080
00081 ::
00082
00083 - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
00084 - QS_global = weighted_scores / (weight_sum + weight_extra_all)
00085 -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
00086 -> weight_sum = sum(w(min(d1,d2))) for shared
00087 -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
00088 -> weight_extra_all = sum(w(d)) for all non-shared
00089 -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
00090
00091 :param ent_1: First structure to be scored.
00092 :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
00093 :class:`~ost.mol.EntityView`
00094 :param ent_2: Second structure to be scored.
00095 :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
00096 :class:`~ost.mol.EntityView`
00097
00098 :raises: :class:`QSscoreError` if input structures are invalid or are monomers
00099 or have issues that make it impossible for a QS score to be computed.
00100
00101 .. attribute:: qs_ent_1
00102
00103 :class:`QSscoreEntity` object for *ent_1* given at construction.
00104 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
00105 set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
00106
00107 .. attribute:: qs_ent_2
00108
00109 :class:`QSscoreEntity` object for *ent_2* given at construction.
00110 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
00111 set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
00112
00113 .. attribute:: calpha_only
00114
00115 True if any of the two structures is CA-only (after cleanup).
00116
00117 :type: :class:`bool`
00118
00119 .. attribute:: max_ca_per_chain_for_cm
00120
00121 Maximal number of CA atoms to use in each chain to determine chain mappings.
00122 Setting this to -1 disables the limit. Limiting it speeds up determination
00123 of symmetries and chain mappings. By default it is set to 100.
00124
00125 :type: :class:`int`
00126 """
00127 def __init__(self, ent_1, ent_2):
00128
00129 if isinstance(ent_1, QSscoreEntity):
00130 self.qs_ent_1 = ent_1
00131 else:
00132 self.qs_ent_1 = QSscoreEntity(ent_1)
00133 if isinstance(ent_2, QSscoreEntity):
00134 self.qs_ent_2 = ent_2
00135 else:
00136 self.qs_ent_2 = QSscoreEntity(ent_2)
00137
00138 if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
00139 raise QSscoreError("Invalid input in QSscorer!")
00140
00141 if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
00142 self.qs_ent_1.SetName('pdb_1')
00143 self.qs_ent_2.SetName('pdb_2')
00144 else:
00145 self.qs_ent_1.SetName(self.qs_ent_1.original_name)
00146 self.qs_ent_2.SetName(self.qs_ent_2.original_name)
00147
00148 self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
00149 self.max_ca_per_chain_for_cm = 100
00150
00151 self._chem_mapping = None
00152 self._ent_to_cm_1 = None
00153 self._ent_to_cm_2 = None
00154 self._symm_1 = None
00155 self._symm_2 = None
00156 self._chain_mapping = None
00157 self._alignments = None
00158 self._mapped_residues = None
00159 self._global_score = None
00160 self._best_score = None
00161 self._superposition = None
00162 self._lddt_score = None
00163 self._lddt_mdl = None
00164 self._lddt_ref = None
00165 self._clustalw_bin = None
00166
00167 @property
00168 def chem_mapping(self):
00169 """Inter-complex mapping of chemical groups.
00170
00171 Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
00172 highest sequence identity. Alignment is between longest sequences in groups.
00173
00174 Limitations:
00175
00176 - If different numbers of groups, we map only the groups for the complex
00177 with less groups (rest considered unmapped and shown as warning)
00178 - The mapping is forced: the "best" mapping will be chosen independently of
00179 how low the seq. identity may be
00180
00181 :getter: Computed on first use (cached)
00182 :type: :class:`dict` with key = :class:`tuple` of chain names in
00183 :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
00184 :attr:`qs_ent_2`.
00185
00186 :raises: :class:`QSscoreError` if we end up having less than 2 chains for
00187 either entity in the mapping (can happen if chains do not have CA
00188 atoms).
00189 """
00190 if self._chem_mapping is None:
00191 self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2)
00192 return self._chem_mapping
00193
00194 @property
00195 def ent_to_cm_1(self):
00196 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
00197
00198 Properties:
00199
00200 - Includes only residues aligned according to :attr:`chem_mapping`
00201 - Includes only 1 CA atom per residue
00202 - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
00203 - All chains of the same chemical group have the same number of atoms
00204 (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
00205 - All chains appearing in :attr:`chem_mapping` appear in this entity
00206 (so the two can be safely used together)
00207
00208 This entity might be transformed (i.e. all positions rotated/translated by
00209 same transformation matrix) if this can speed up computations. So do not
00210 assume fixed global positions (but relative distances will remain fixed).
00211
00212 :getter: Computed on first use (cached)
00213 :type: :class:`~ost.mol.EntityHandle`
00214
00215 :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
00216 """
00217 if self._ent_to_cm_1 is None:
00218 self._ComputeAlignedEntities()
00219 return self._ent_to_cm_1
00220
00221 @property
00222 def ent_to_cm_2(self):
00223 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
00224 (see :attr:`ent_to_cm_1` for details).
00225 """
00226 if self._ent_to_cm_2 is None:
00227 self._ComputeAlignedEntities()
00228 return self._ent_to_cm_2
00229
00230 @property
00231 def symm_1(self):
00232 """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
00233
00234 This is a list of chain-lists where each chain-list can be used reconstruct
00235 the others via cyclic C or dihedral D symmetry. The first chain-list is used
00236 as a representative symmetry group. For heteromers, the group-members must
00237 contain all different seqres in oligomer.
00238
00239 Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
00240 symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
00241
00242 Properties:
00243
00244 - All symmetry group tuples have the same length (num. of chains)
00245 - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
00246 - For heteros: symmetry group tuples have all different chem. groups
00247 - Trivial symmetry group = one tuple with all chains (used if inconsistent
00248 data provided or if no symmetry is found)
00249 - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
00250 Compatibility requires same lengths of symmetry group tuples and it must
00251 be possible to get an overlap (80% of residues covered within 6 A of a
00252 (chem. mapped) chain) of all chains in representative symmetry groups by
00253 superposing one pair of chains.
00254
00255 :getter: Computed on first use (cached)
00256 :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
00257 """
00258 if self._symm_1 is None:
00259 self._ComputeSymmetry()
00260 return self._symm_1
00261
00262 @property
00263 def symm_2(self):
00264 """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
00265 if self._symm_2 is None:
00266 self._ComputeSymmetry()
00267 return self._symm_2
00268
00269 def SetSymmetries(self, symm_1, symm_2):
00270 """Set user-provided symmetry groups.
00271
00272 These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
00273 and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
00274 chains and both *symm_1* and *symm_2* have same lengths of symmetry group
00275 tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
00276
00277 :param symm_1: Value to set for :attr:`symm_1`.
00278 :param symm_2: Value to set for :attr:`symm_2`.
00279 """
00280
00281 self._symm_1 = _CleanUserSymmetry(symm_1, self.ent_to_cm_1)
00282 self._symm_2 = _CleanUserSymmetry(symm_2, self.ent_to_cm_2)
00283
00284 if not _AreValidSymmetries(self._symm_1, self._symm_2):
00285 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
00286 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
00287
00288 @property
00289 def chain_mapping(self):
00290 """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
00291
00292 Properties:
00293
00294 - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
00295 - Each chain can appear only once in mapping
00296 - All chains of complex with less chains are mapped
00297 - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
00298
00299 Details on algorithms used to find mapping:
00300
00301 - We try all pairs of chem. mapped chains within symmetry group and get
00302 superpose-transformation for them
00303 - First option: check for "sufficient overlap" of other chain-pairs
00304
00305 - For each chain-pair defined above: apply superposition to full oligomer
00306 and map chains based on structural overlap
00307 - Structural overlap = X% of residues in second oligomer covered within Y
00308 Angstrom of a (chem. mapped) chain in first oligomer. We successively
00309 try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
00310 mapping (warning shown for most permissive one).
00311 - If multiple possible mappings are found, we choose the one which leads
00312 to the lowest multi-chain-RMSD given the superposition
00313
00314 - Fallback option: try all mappings to find minimal multi-chain-RMSD
00315 (warning shown)
00316
00317 - For each chain-pair defined above: apply superposition, try all (!)
00318 possible chain mappings (within symmetry group) and keep mapping with
00319 lowest multi-chain-RMSD
00320 - Repeat procedure above to resolve symmetry. Within the symmetry group we
00321 can use the chain mapping computed before and we just need to find which
00322 symmetry group in first oligomer maps to which in the second one. We
00323 again try all possible combinations...
00324 - Limitations:
00325
00326 - Trying all possible mappings is a combinatorial nightmare (factorial).
00327 We throw an exception if too many combinations (e.g. octomer vs
00328 octomer with no usable symmetry)
00329 - The mapping is forced: the "best" mapping will be chosen independently
00330 of how badly they fit in terms of multi-chain-RMSD
00331 - As a result, such a forced mapping can lead to a large range of
00332 resulting QS scores. An extreme example was observed between 1on3.1
00333 and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
00334 for mappings with very similar multi-chain-RMSD.
00335
00336 :getter: Computed on first use (cached)
00337 :type: :class:`dict` with key / value = :class:`str` (chain names, key
00338 for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
00339 :raises: :class:`QSscoreError` if there are too many combinations to check
00340 to find a chain mapping.
00341 """
00342 if self._chain_mapping is None:
00343 self._chain_mapping = _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2,
00344 self.symm_1, self.symm_2,
00345 self.chem_mapping)
00346 LogInfo('Mapping found: %s' % str(self._chain_mapping))
00347 return self._chain_mapping
00348
00349 @property
00350 def alignments(self):
00351 """List of successful sequence alignments using :attr:`chain_mapping`.
00352
00353 There will be one alignment for each mapped chain and they are ordered by
00354 their chain names in :attr:`qs_ent_1`.
00355
00356 The sequences of the alignments have views attached into
00357 :attr:`QSscoreEntity.ent` of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
00358
00359 :getter: Computed on first use (cached)
00360 :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
00361 """
00362 if self._alignments is None:
00363 self._alignments = _GetMappedAlignments(self.qs_ent_1.ent,
00364 self.qs_ent_2.ent,
00365 self.chain_mapping)
00366 return self._alignments
00367
00368 @property
00369 def mapped_residues(self):
00370 """Mapping of shared residues in :attr:`alignments`.
00371
00372 :getter: Computed on first use (cached)
00373 :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
00374 *c1* = Chain name in first entity (= first sequence in aln),
00375 *r1* = Residue number in first entity,
00376 *r2* = Residue number in second entity
00377 """
00378 if self._mapped_residues is None:
00379 self._mapped_residues = _GetMappedResidues(self.alignments)
00380 return self._mapped_residues
00381
00382 @property
00383 def global_score(self):
00384 """QS-score with penalties.
00385
00386 The range of the score is between 0 (i.e. no interface residues are shared
00387 between biounits) and 1 (i.e. the interfaces are identical).
00388
00389 The global QS-score is computed applying penalties when interface residues
00390 or entire chains are missing (i.e. anything that is not mapped in
00391 :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
00392
00393 :getter: Computed on first use (cached)
00394 :type: :class:`float`
00395 """
00396 if self._global_score is None:
00397 self._ComputeScores()
00398 return self._global_score
00399
00400 @property
00401 def best_score(self):
00402 """QS-score without penalties.
00403
00404 Like :attr:`global_score`, but neglecting additional residues or chains in
00405 one of the biounits (i.e. the score is calculated considering only mapped
00406 chains and residues).
00407
00408 :getter: Computed on first use (cached)
00409 :type: :class:`float`
00410 """
00411 if self._best_score is None:
00412 self._ComputeScores()
00413 return self._best_score
00414
00415 @property
00416 def superposition(self):
00417 """Superposition result based on shared CA atoms in :attr:`alignments`.
00418
00419 The superposition can be used to map :attr:`QSscoreEntity.ent` of
00420 :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
00421 :func:`ost.geom.Invert` if you need the opposite transformation.
00422
00423 :getter: Computed on first use (cached)
00424 :type: :class:`ost.mol.alg.SuperpositionResult`
00425 """
00426 if self._superposition is None:
00427 self._superposition = _GetQsSuperposition(self.alignments)
00428
00429 sup_rmsd = self._superposition.rmsd
00430 cmp_view = self._superposition.view1
00431 LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
00432 % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
00433 return self._superposition
00434
00435 @property
00436 def lddt_score(self):
00437 """The multi-chain lDDT score.
00438
00439 .. note::
00440
00441 lDDT is not considering over-prediction (i.e. extra chains) and hence is
00442 not symmetric. Here, we consider :attr:`qs_ent_1` as the reference and
00443 :attr:`qs_ent_2` as the model. The alignments from :attr:`alignments` are
00444 used to map residue numbers and chains.
00445
00446 The score is computed with OST's :func:`~ost.mol.alg.LocalDistDiffTest`
00447 function with a single distance threshold of 2 A and an inclusion radius of
00448 8 A. You can use :attr:`lddt_mdl` and :attr:`lddt_ref` to get entities on
00449 which you can call any other lDDT function with any other set of parameters.
00450
00451 :getter: Computed on first use (cached)
00452 :type: :class:`float`
00453 """
00454 if self._lddt_score is None:
00455 self._ComputeLDDT()
00456 return self._lddt_score
00457
00458 @property
00459 def lddt_mdl(self):
00460 """The model entity used for lDDT scoring (:attr:`lddt_score`) and annotated
00461 with local scores.
00462
00463 Local scores are available as residue properties named 'lddt' and on each
00464 atom as a B-factor. Only CA atoms are considered if :attr:`calpha_only` is
00465 True, otherwise this is an all-atom score.
00466
00467 Since, the lDDT computation requires a single chain with mapped residue
00468 numbering, all chains are appended into a single chain X with unique residue
00469 numbers according to the column-index in the alignment. The alignments are
00470 in the same order as they appear in :attr:`alignments`. Additional residues
00471 are appended at the end of the chain with unique residue numbers.
00472
00473 :getter: Computed on first use (cached)
00474 :type: :class:`~ost.mol.EntityHandle`
00475 """
00476 if self._lddt_mdl is None:
00477 self._ComputeLDDT()
00478 return self._lddt_mdl
00479
00480 @property
00481 def lddt_ref(self):
00482 """The reference entity used for lDDT scoring (:attr:`lddt_score`).
00483
00484 This is a single chain X with residue numbers matching ones in
00485 :attr:`lddt_mdl` where aligned and unique numbers for additional residues.
00486
00487 :getter: Computed on first use (cached)
00488 :type: :class:`~ost.mol.EntityHandle`
00489 """
00490 if self._lddt_ref is None:
00491 self._ComputeLDDT()
00492 return self._lddt_ref
00493
00494 @property
00495 def clustalw_bin(self):
00496 """
00497 Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
00498 sequence alignments (unless :attr:`chain_mapping` is provided manually).
00499
00500 :getter: Located in path on first use (cached)
00501 :type: :class:`str`
00502 """
00503 if self._clustalw_bin is None:
00504 self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
00505 return self._clustalw_bin
00506
00507
00508
00509
00510
00511 def _ComputeAlignedEntities(self):
00512 """Fills cached ent_to_cm_1 and ent_to_cm_2."""
00513
00514 ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
00515 self.chem_mapping,
00516 self.max_ca_per_chain_for_cm,
00517 self.clustalw_bin)
00518
00519 self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
00520 self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
00521
00522 self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
00523 self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
00524
00525 def _ComputeSymmetry(self):
00526 """Fills cached symm_1 and symm_2."""
00527
00528 self._symm_1, self._symm_2 = \
00529 _FindSymmetry(self.qs_ent_1, self.qs_ent_2, self.ent_to_cm_1,
00530 self.ent_to_cm_2, self.chem_mapping)
00531
00532 if not _AreValidSymmetries(self._symm_1, self._symm_2):
00533 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
00534 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
00535
00536 def _ComputeScores(self):
00537 """Fills cached global_score and best_score."""
00538
00539 if self.calpha_only:
00540 contacts_1 = self.qs_ent_1.contacts_ca
00541 contacts_2 = self.qs_ent_2.contacts_ca
00542 else:
00543 contacts_1 = self.qs_ent_1.contacts
00544 contacts_2 = self.qs_ent_2.contacts
00545
00546 scores = _GetScores(contacts_1, contacts_2, self.mapped_residues,
00547 self.chain_mapping)
00548 self._best_score = scores[0]
00549 self._global_score = scores[1]
00550
00551 LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
00552 % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
00553 self._best_score, self._global_score))
00554
00555 def _ComputeLDDT(self):
00556 """Fills cached lddt_score, lddt_mdl and lddt_ref."""
00557 LogInfo('Computing lDDT score')
00558
00559 ref, mdl = self.qs_ent_1.ent, self.qs_ent_2.ent
00560 LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count))
00561 LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count))
00562 if mdl.chain_count > ref.chain_count:
00563 LogWarning('MODEL contains more chains than REFERENCE, '
00564 'lDDT is not considering them')
00565
00566 self._lddt_ref, self._lddt_mdl = \
00567 _MergeAlignedChains(self.alignments, ref, mdl, self.calpha_only)
00568
00569 self._lddt_score = _ComputeLDDTScore(self._lddt_ref, self._lddt_mdl)
00570
00571
00572
00573
00574
00575
00576 class QSscoreEntity(object):
00577 """Entity with cached entries for QS scoring.
00578
00579 Any known / precomputed information can be filled into the appropriate
00580 attribute here as long as they are labelled as read/write. Otherwise the
00581 quantities are computed on first access and cached (lazy evaluation). The
00582 heaviest load is expected when computing :attr:`contacts` and
00583 :attr:`contacts_ca`.
00584
00585 :param ent: Entity to be used for QS scoring. A copy of it will be processed.
00586 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
00587
00588 .. attribute:: is_valid
00589
00590 True, if successfully initialized. False, if input structure is monomer or
00591 has less than 2 protein chains with >= 20 residues.
00592
00593 :type: :class:`bool`
00594
00595 .. attribute:: original_name
00596
00597 Name set for *ent* when object was created.
00598
00599 :type: :class:`str`
00600
00601 .. attribute:: ent
00602
00603 Cleaned version of *ent* passed at construction. Hydrogens are removed, the
00604 entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
00605 listed in :attr:`removed_chains` have been removed. The name of this entity
00606 might change during scoring (see :func:`GetName`). Otherwise, this will be
00607 fixed.
00608
00609 :type: :class:`~ost.mol.EntityHandle`
00610
00611 .. attribute:: removed_chains
00612
00613 Chains removed from *ent* passed at construction. These are ligand and water
00614 chains as well as small (< 20 res.) peptides or chains with no amino acids
00615 (determined by chem. type, which is set by rule based processor).
00616
00617 :type: :class:`list` of :class:`str`
00618
00619 .. attribute:: calpha_only
00620
00621 Whether entity is CA-only (i.e. it has 0 CB atoms)
00622
00623 :type: :class:`bool`
00624 """
00625 def __init__(self, ent):
00626
00627 self.original_name = ent.GetName()
00628 ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
00629 if not conop.GetDefaultLib():
00630 raise RuntimeError("QSscore computation requires a compound library!")
00631 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
00632 pr.Process(ent)
00633 self.ent, self.removed_chains, self.calpha_only = _CleanInputEntity(ent)
00634
00635 if self.ent.chain_count == 0:
00636 LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
00637 'removing water, ligands and small peptides.')
00638 self.is_valid = False
00639 elif self.ent.chain_count == 1:
00640 LogError('Structure ' + ent.GetName() + ' is a monomer. '
00641 'QSscore is not defined for monomers.')
00642 self.is_valid = False
00643 else:
00644 self.is_valid = True
00645
00646 self._ca_entity = None
00647 self._ca_chains = None
00648 self._alignments = {}
00649 self._chem_groups = None
00650 self._angles = {}
00651 self._axis = {}
00652 self._contacts = None
00653 self._contacts_ca = None
00654
00655 def GetName(self):
00656 """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
00657 This is used to uniquely identify the entity while scoring. The name may
00658 therefore change while :attr:`original_name` remains fixed.
00659 """
00660
00661 return self.ent.GetName()
00662
00663 def SetName(self, new_name):
00664 """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
00665 Use this to change unique identifier while scoring (see :func:`GetName`).
00666 """
00667
00668 self.ent.SetName(new_name)
00669
00670 @property
00671 def ca_entity(self):
00672 """
00673 Reduced representation of :attr:`ent` with only CA atoms.
00674 This guarantees that each included residue has exactly one atom.
00675
00676 :getter: Computed on first use (cached)
00677 :type: :class:`~ost.mol.EntityHandle`
00678 """
00679 if self._ca_entity is None:
00680 self._ca_entity = _GetCAOnlyEntity(self.ent)
00681 return self._ca_entity
00682
00683 @property
00684 def ca_chains(self):
00685 """
00686 Map of chain names in :attr:`ent` to sequences with attached view to CA-only
00687 chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
00688
00689 :getter: Computed on first use (cached)
00690 :type: :class:`dict` (key = :class:`str`,
00691 value = :class:`~ost.seq.SequenceHandle`)
00692 """
00693 if self._ca_chains is None:
00694 self._ca_chains = dict()
00695 ca_entity = self.ca_entity
00696 for ch in ca_entity.chains:
00697 self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
00698 return self._ca_chains
00699
00700 def GetAlignment(self, c1, c2):
00701 """Get sequence alignment of chain *c1* with chain *c2*.
00702 Computed on first use based on :attr:`ca_chains` (cached).
00703
00704 :param c1: Chain name for first chain to align.
00705 :type c1: :class:`str`
00706 :param c2: Chain name for second chain to align.
00707 :type c2: :class:`str`
00708 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
00709 """
00710 if (c1,c2) not in self._alignments:
00711 ca_chains = self.ca_chains
00712 self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
00713 return self._alignments[(c1,c2)]
00714
00715 @property
00716 def chem_groups(self):
00717 """
00718 Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
00719 chains as extracted from :attr:`ca_chains`. First chain in group is the one
00720 with the longest sequence.
00721
00722 :getter: Computed on first use (cached)
00723 :type: :class:`list` of :class:`list` of :class:`str` (chain names)
00724 """
00725 if self._chem_groups is None:
00726 self._chem_groups = _GetChemGroups(self, 95)
00727 LogInfo('Chemically equivalent chain-groups in %s: %s' \
00728 % (self.GetName(), str(self._chem_groups)))
00729 return self._chem_groups
00730
00731 def GetAngles(self, c1, c2):
00732 """Get Euler angles from superposition of chain *c1* with chain *c2*.
00733 Computed on first use based on :attr:`ca_chains` (cached).
00734
00735 :param c1: Chain name for first chain to superpose.
00736 :type c1: :class:`str`
00737 :param c2: Chain name for second chain to superpose.
00738 :type c2: :class:`str`
00739 :return: 3 Euler angles (may contain nan if something fails).
00740 :rtype: :class:`numpy.array`
00741 """
00742 if (c1,c2) not in self._angles:
00743 self._GetSuperposeData(c1, c2)
00744 return self._angles[(c1,c2)]
00745
00746 def GetAxis(self, c1, c2):
00747 """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
00748 Computed on first use based on :attr:`ca_chains` (cached).
00749
00750 :param c1: Chain name for first chain to superpose.
00751 :type c1: :class:`str`
00752 :param c2: Chain name for second chain to superpose.
00753 :type c2: :class:`str`
00754 :return: Rotational axis (may contain nan if something fails).
00755 :rtype: :class:`numpy.array`
00756 """
00757 if (c1,c2) not in self._axis:
00758 self._GetSuperposeData(c1, c2)
00759 return self._axis[(c1,c2)]
00760
00761 @property
00762 def contacts(self):
00763 """
00764 Connectivity dictionary (**read/write**).
00765 As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
00766
00767 :getter: Computed on first use (cached)
00768 :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
00769 for chains in the cleaned entity.
00770 :type: See return type of :func:`GetContacts`
00771 """
00772 if self._contacts is None:
00773 self._contacts = GetContacts(self.ent, False)
00774 return self._contacts
00775
00776 @contacts.setter
00777 def contacts(self, new_contacts):
00778 chain_names = set([ch.name for ch in self.ent.chains])
00779 self._contacts = FilterContacts(new_contacts, chain_names)
00780
00781 @property
00782 def contacts_ca(self):
00783 """
00784 CA-only connectivity dictionary (**read/write**).
00785 Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
00786 """
00787 if self._contacts_ca is None:
00788 self._contacts_ca = GetContacts(self.ent, True)
00789 return self._contacts_ca
00790
00791 @contacts_ca.setter
00792 def contacts_ca(self, new_contacts):
00793 chain_names = set([ch.name for ch in self.ent.chains])
00794 self._contacts_ca = FilterContacts(new_contacts, chain_names)
00795
00796
00797
00798
00799
00800 def _GetSuperposeData(self, c1, c2):
00801 """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
00802
00803 aln = self.GetAlignment(c1, c2)
00804 if not aln:
00805
00806 self._angles[(c1,c2)] = np.empty(3) * np.nan
00807 self._axis[(c1,c2)] = np.empty(3) * np.nan
00808 return
00809 v1, v2 = seq.ViewsFromAlignment(aln)
00810 if v1.atom_count < 3:
00811
00812 self._angles[(c1,c2)] = np.empty(3) * np.nan
00813 self._axis[(c1,c2)] = np.empty(3) * np.nan
00814 return
00815
00816 sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
00817 Rt = sup_res.transformation
00818
00819 a,b,c = _GetAngles(Rt)
00820 self._angles[(c1,c2)] = np.asarray([a,b,c])
00821
00822 R3 = geom.Rotation3(Rt.ExtractRotation())
00823 self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
00824
00825
00826
00827
00828
00829 def FilterContacts(contacts, chain_names):
00830 """Filter contacts to contain only contacts for chains in *chain_names*.
00831
00832 :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
00833 :type contacts: :class:`dict`
00834 :param chain_names: Chain names to keep.
00835 :type chain_names: :class:`list` or (better) :class:`set`
00836 :return: New connectivity dictionary (format as in :func:`GetContacts`)
00837 :rtype: :class:`dict`
00838 """
00839
00840 filtered_contacts = dict()
00841 for c1 in contacts:
00842 if c1 in chain_names:
00843 new_contacts = dict()
00844 for c2 in contacts[c1]:
00845 if c2 in chain_names:
00846 new_contacts[c2] = contacts[c1][c2]
00847
00848 if new_contacts:
00849 filtered_contacts[c1] = new_contacts
00850 return filtered_contacts
00851
00852 def GetContacts(entity, calpha_only, dist_thr=12.0):
00853 """Get inter-chain contacts of a macromolecular entity.
00854
00855 Contacts are pairs of residues within a given distance belonging to different
00856 chains. They are stored once per pair and include the CA/CB-CA/CB distance.
00857
00858 :param entity: An entity to check connectivity for.
00859 :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
00860 :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
00861 unless the residue is a GLY.
00862 :type calpha_only: :class:`bool`
00863 :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
00864 :type dist_thr: :class:`float`
00865 :return: A connectivity dictionary. A pair of residues with chain names
00866 *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
00867 *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
00868 stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
00869 :rtype: :class:`dict`
00870 """
00871
00872 if calpha_only:
00873 ev = entity.Select("aname=CA")
00874 else:
00875 ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
00876 ent = mol.CreateEntityFromView(ev, True)
00877
00878 contacts = dict()
00879 for atom in ent.atoms:
00880 ch_name1 = atom.chain.name
00881 res_num1 = atom.residue.number.num
00882 close_atoms = ent.FindWithin(atom.pos, dist_thr)
00883 for close_atom in close_atoms:
00884 ch_name2 = close_atom.chain.name
00885 if ch_name2 > ch_name1:
00886 res_num2 = close_atom.residue.number.num
00887 dist = geom.Distance(atom.pos, close_atom.pos)
00888
00889 if ch_name1 not in contacts:
00890 contacts[ch_name1] = dict()
00891 if ch_name2 not in contacts[ch_name1]:
00892 contacts[ch_name1][ch_name2] = dict()
00893 if res_num1 not in contacts[ch_name1][ch_name2]:
00894 contacts[ch_name1][ch_name2][res_num1] = dict()
00895 contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
00896
00897 return contacts
00898
00899
00900
00901
00902
00903
00904
00905
00906 def _AlignAtomSeqs(seq_1, seq_2):
00907 """
00908 :type seq_1: :class:`ost.seq.SequenceHandle`
00909 :type seq_2: :class:`ost.seq.SequenceHandle`
00910 :return: Alignment of two sequences using a global aignment. Views attached
00911 to the input sequences will remain attached in the aln.
00912 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
00913 """
00914
00915
00916 aln = None
00917 alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
00918 if alns: aln = alns[0]
00919 if not aln:
00920 LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
00921 LogWarning('%s: %s' % (seq_1.name, seq_1.string))
00922 LogWarning('%s: %s' % (seq_2.name, seq_2.string))
00923 return aln
00924
00925
00926
00927 def _CleanInputEntity(ent):
00928 """
00929 :param ent: The OST entity to be cleaned.
00930 :type ent: :class:`EntityHandle` or :class:`EntityView`
00931 :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
00932 :attr:`QSscoreEntity.removed_chains`,
00933 :attr:`QSscoreEntity.calpha_only`
00934 """
00935
00936 removed_chains = []
00937 for ch in ent.chains:
00938
00939
00940 if ch.name in ['-', '_'] \
00941 or ch.residue_count < 20 \
00942 or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
00943 or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
00944 removed_chains.append(ch.name)
00945
00946
00947 if removed_chains:
00948 chain_set = set()
00949 for ch_name in removed_chains:
00950 if ch_name in ['-', '_', ' ']:
00951 chain_set.add('"%c"' % ch_name)
00952 else:
00953 chain_set.add(ch_name)
00954 view = ent.Select('cname!=%s' % ','.join(chain_set))
00955 ent_new = mol.CreateEntityFromView(view, True)
00956 ent_new.SetName(ent.GetName())
00957 else:
00958 ent_new = ent
00959
00960
00961 calpha_only = False
00962 if ent_new.Select('aname=CB').atom_count == 0:
00963 LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
00964 calpha_only = True
00965
00966
00967 if removed_chains:
00968 LogInfo('Chains removed from %s: %s' \
00969 % (ent_new.GetName(), ''.join(removed_chains)))
00970 LogInfo('Chains in %s: %s' \
00971 % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
00972 return ent_new, removed_chains, calpha_only
00973
00974 def _GetCAOnlyEntity(ent):
00975 """
00976 :param ent: Entity to process
00977 :type ent: :class:`EntityHandle` or :class:`EntityView`
00978 :return: New entity with only CA and only one atom per residue
00979 (see :attr:`QSscoreEntity.ca_entity`)
00980 """
00981
00982 ca_view = ent.CreateEmptyView()
00983
00984 for res in ent.residues:
00985 ca_atom = res.FindAtom("CA")
00986 if ca_atom.IsValid():
00987 ca_view.AddAtom(ca_atom)
00988
00989 return mol.CreateEntityFromView(ca_view, False)
00990
00991 def _GetChemGroups(qs_ent, seqid_thr=95.):
00992 """
00993 :return: Intra-complex group of chemically identical polypeptide chains
00994 (see :attr:`QSscoreEntity.chem_groups`)
00995
00996 :param qs_ent: Entity to process
00997 :type qs_ent: :class:`QSscoreEntity`
00998 :param seqid_thr: Threshold used to decide when two chains are identical.
00999 95 percent tolerates the few mutations crystallographers
01000 like to do.
01001 :type seqid_thr: :class:`float`
01002 """
01003
01004 ca_chains = qs_ent.ca_chains
01005 chain_names = sorted(ca_chains.keys())
01006
01007
01008
01009 id_seqs = []
01010 for ch_1, ch_2 in itertools.combinations(chain_names, 2):
01011 aln = qs_ent.GetAlignment(ch_1, ch_2)
01012 if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
01013 id_seqs.append((ch_1, ch_2))
01014
01015 if not id_seqs:
01016 return [[name] for name in chain_names]
01017
01018
01019 groups = []
01020 for ch_1, ch_2 in id_seqs:
01021 found = False
01022 for g in groups:
01023 if ch_1 in g or ch_2 in g:
01024 found = True
01025 g.add(ch_1)
01026 g.add(ch_2)
01027 if not found:
01028 groups.append(set([ch_1, ch_2]))
01029
01030 chem_groups = []
01031 for g in groups:
01032 ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
01033 chem_groups.append([ch for _,ch in ranked_g])
01034
01035 for ch in chain_names:
01036 if not any(ch in g for g in chem_groups):
01037 chem_groups.append([ch])
01038
01039 return chem_groups
01040
01041 def _GetAngles(Rt):
01042 """Computes the Euler angles given a transformation matrix.
01043
01044 :param Rt: Rt operator.
01045 :type Rt: :class:`ost.geom.Mat4`
01046 :return: A :class:`tuple` of angles for each axis (x,y,z)
01047 """
01048 rot = np.asmatrix(Rt.ExtractRotation().data).reshape(3,3)
01049 tx = np.arctan2(rot[2,1], rot[2,2])
01050 if tx < 0:
01051 tx += 2*np.pi
01052 ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
01053 if ty < 0:
01054 ty += 2*np.pi
01055 tz = np.arctan2(rot[1,0], rot[0,0])
01056 if tz < 0:
01057 tz += 2*np.pi
01058 return tx,ty,tz
01059
01060
01061
01062 def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
01063 """
01064 :return: Inter-complex mapping of chemical groups
01065 (see :attr:`QSscorer.chem_mapping`)
01066
01067 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01068 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01069 """
01070
01071 chem_groups_1 = qs_ent_1.chem_groups
01072 chem_groups_2 = qs_ent_2.chem_groups
01073 repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
01074 repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
01075
01076
01077
01078 swapped = False
01079 if len(repr_chains_2) < len(repr_chains_1):
01080 repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
01081 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
01082 swapped = True
01083
01084
01085
01086
01087
01088
01089
01090 chain_pairs = []
01091 ca_chains_1 = qs_ent_1.ca_chains
01092 ca_chains_2 = qs_ent_2.ca_chains
01093 for ch_1 in repr_chains_1.keys():
01094 for ch_2 in repr_chains_2.keys():
01095 aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
01096 if aln:
01097 chains_seqid = seq.alg.SequenceIdentity(aln)
01098 LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
01099 chain_pairs.append((chains_seqid, ch_1, ch_2))
01100
01101
01102 chain_pairs = sorted(chain_pairs, reverse=True)
01103 chem_mapping = {}
01104 for _, c1, c2 in chain_pairs:
01105 skip = False
01106 for a,b in chem_mapping.iteritems():
01107 if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
01108 skip = True
01109 break
01110 if not skip:
01111 chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
01112 if swapped:
01113 chem_mapping = {y: x for x, y in chem_mapping.iteritems()}
01114 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
01115
01116
01117 mapped_1 = set([i for s in chem_mapping.keys() for i in s])
01118 chains_1 = set([c.name for c in qs_ent_1.ent.chains])
01119 if chains_1 - mapped_1:
01120 LogWarning('Unmapped Chains in %s: %s'
01121 % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
01122
01123 mapped_2 = set([i for s in chem_mapping.values() for i in s])
01124 chains_2 = set([c.name for c in qs_ent_2.ent.chains])
01125 if chains_2 - mapped_2:
01126 LogWarning('Unmapped Chains in %s: %s'
01127 % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
01128
01129
01130 LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
01131 if len(mapped_1) < 2 or len(mapped_2) < 2:
01132 raise QSscoreError('Less than 2 chains left in chem_mapping.')
01133 return chem_mapping
01134
01135 def _SelectFew(l, max_elements):
01136 """Return l or copy of l with at most *max_elements* entries."""
01137 n_el = len(l)
01138 if n_el <= max_elements:
01139 return l
01140 else:
01141
01142 d_el = ((n_el - 1) // max_elements) + 1
01143 new_l = list()
01144 for i in range(0, n_el, d_el):
01145 new_l.append(l[i])
01146 return new_l
01147
01148 def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
01149 clustalw_bin):
01150 """
01151 :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
01152 of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
01153 those views (see :attr:`QSscorer.ent_to_cm_1` and
01154 :attr:`QSscorer.ent_to_cm_2`)
01155
01156 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01157 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01158 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01159 :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
01160 """
01161
01162 ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
01163 ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
01164 ca_chains_1 = qs_ent_1.ca_chains
01165 ca_chains_2 = qs_ent_2.ca_chains
01166
01167 for group_1, group_2 in chem_mapping.iteritems():
01168
01169 seq_list = seq.CreateSequenceList()
01170
01171 seq_to_empty_view = dict()
01172 for ch in group_1:
01173 sequence = ca_chains_1[ch].Copy()
01174 sequence.name = qs_ent_1.GetName() + '.' + ch
01175 seq_to_empty_view[sequence.name] = ent_view_1
01176 seq_list.AddSequence(sequence)
01177 for ch in group_2:
01178 sequence = ca_chains_2[ch].Copy()
01179 sequence.name = qs_ent_2.GetName() + '.' + ch
01180 seq_to_empty_view[sequence.name] = ent_view_2
01181 seq_list.AddSequence(sequence)
01182 alnc = ClustalW(seq_list, clustalw=clustalw_bin)
01183
01184
01185 residue_lists = list()
01186 sequence_count = alnc.sequence_count
01187 for col in alnc:
01188 residues = [col.GetResidue(ir) for ir in range(sequence_count)]
01189 if all([r.IsValid() for r in residues]):
01190 residue_lists.append(residues)
01191
01192 if len(residue_lists) < 5:
01193 raise QSscoreError('Not enough aligned residues.')
01194 elif max_ca_per_chain > 0:
01195 residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
01196
01197 res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
01198 for ir in range(sequence_count)]
01199
01200 for res_list in residue_lists:
01201 for res, view in zip(res_list, res_views):
01202 view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
01203
01204 return ent_view_1, ent_view_2
01205
01206
01207 def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
01208 """
01209 :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
01210 and :attr:`QSscorer.symm_2`) between the two structures.
01211 Empty lists if no symmetry identified.
01212
01213 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01214 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01215 :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
01216 :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
01217 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01218 """
01219 LogInfo('Identifying Symmetry Groups...')
01220
01221
01222 symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
01223 chem_mapping.keys())
01224 symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
01225 chem_mapping.values())
01226
01227
01228 LogInfo('Selecting Symmetry Groups...')
01229
01230
01231
01232
01233 best_symm = []
01234 for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
01235 s1 = symm_1[0]
01236 s2 = symm_2[0]
01237 if len(s1) != len(s2):
01238 LogDebug('Discarded', str(symm_1), str(symm_2),
01239 ': different size of symmetry groups')
01240 continue
01241 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
01242 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
01243 LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
01244 best_symm.append((nr_mapp, symm_1, symm_2))
01245
01246 for _, symm_1, symm_2 in sorted(best_symm):
01247 s1 = symm_1[0]
01248 s2 = symm_2[0]
01249 group_1 = ent_to_cm_1.Select('cname=%s' % ','.join(s1))
01250 group_2 = ent_to_cm_2.Select('cname=%s' % ','.join(s2))
01251
01252
01253
01254 closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
01255 chem_mapping, 6, 0.8, find_best=False)
01256
01257 if closed_symm:
01258 return symm_1, symm_2
01259
01260
01261 LogInfo('No coherent symmetry identified between structures')
01262 return [], []
01263
01264
01265 def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping):
01266 """
01267 :return: Mapping from *ent_1* to *ent_2* (see :attr:`QSscorer.chain_mapping`)
01268
01269 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
01270 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
01271 :param symm_1: See :attr:`QSscorer.symm_1`
01272 :param symm_2: See :attr:`QSscorer.symm_2`
01273 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01274 """
01275 LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
01276 LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
01277
01278
01279 thresholds = [( 'strict', 4, 0.8),
01280 ( 'tolerant', 6, 0.4),
01281 ('permissive', 8, 0.2)]
01282 for scheme, sup_thr, sup_fract in thresholds:
01283
01284
01285
01286 mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
01287 chem_mapping, sup_thr, sup_fract)
01288 if mapping:
01289 LogInfo('Closed Symmetry with %s parameters' % scheme)
01290 if scheme == 'permissive':
01291 LogWarning('Permissive thresholds used for overlap based mapping ' + \
01292 'detection: check mapping manually: %s' % mapping)
01293 return mapping
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
01308 LogInfo('Intra Symmetry-group mappings to check: %s' \
01309 % count['intra']['mappings'])
01310 LogInfo('Inter Symmetry-group mappings to check: %s' \
01311 % count['inter']['mappings'])
01312 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
01313 if nr_mapp > 100000:
01314 raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
01315
01316
01317 cached_rmsd = _CachedRMSD(ent_1, ent_2)
01318
01319
01320 check = 0
01321 intra_mappings = []
01322
01323 ref_symm_1 = symm_1[0]
01324 ref_symm_2 = symm_2[0]
01325 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01326
01327 for g1, g2 in asu_chem_mapping.iteritems():
01328
01329 for c1, c2 in itertools.product(g1, g2):
01330
01331 LogVerbose('Superposing chains: %s, %s' % (c1, c2))
01332 res = cached_rmsd.GetSuperposition(c1, c2)
01333
01334 cur_mappings = []
01335 for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
01336 check += 1
01337 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
01338 cur_mappings.append((chains_rmsd, mapping))
01339 LogVerbose(str(mapping), str(chains_rmsd))
01340 best_rmsd, best_mapp = min(cur_mappings)
01341 intra_mappings.append((best_rmsd, c1, c2, best_mapp))
01342
01343 _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
01344
01345
01346 if len(symm_1) == 1 or len(symm_2) == 1:
01347 mapping = intra_asu_mapp
01348 else:
01349
01350
01351
01352 index_asu_c1 = ref_symm_1.index(intra_asu_c1)
01353 index_asu_c2 = ref_symm_2.index(intra_asu_c2)
01354 index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
01355 for s1, s2 in intra_asu_mapp.iteritems()}
01356 LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
01357
01358
01359
01360 if len(symm_1) < len(symm_2):
01361 symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
01362 else:
01363 symm_combinations = list(itertools.product([symm_1[0]], symm_2))
01364
01365 full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
01366 full_mappings = []
01367 for s1, s2 in symm_combinations:
01368
01369 LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
01370 % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
01371 res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
01372
01373 for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
01374 check += 1
01375
01376 mapping = {}
01377 for a, b in asu_mapp.iteritems():
01378 for id_a, id_b in index_mapp.iteritems():
01379 mapping[a[id_a]] = b[id_b]
01380 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
01381 full_mappings.append((chains_rmsd, mapping))
01382 LogVerbose(str(mapping), str(chains_rmsd))
01383
01384 if check != nr_mapp:
01385 LogWarning('Mapping number estimation was wrong')
01386
01387
01388 mapping = min(full_mappings)[1]
01389
01390 LogWarning('Extensive search used for mapping detection (fallback). This ' + \
01391 'approach has known limitations. Check mapping manually: %s' \
01392 % mapping)
01393 return mapping
01394
01395
01396 def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
01397 """
01398 Identify the symmetry (either cyclic C or dihedral D) of the protein and find
01399 all possible symmetry subgroups. This is done testing all combination of chain
01400 superposition and clustering them by the angles (D) or the axis (C) of the Rt
01401 operator.
01402
01403 Groups of superposition which can fully reconstruct the structure are possible
01404 symmetry subgroups.
01405
01406 :param qs_ent: Entity with cached angles and axis.
01407 :type qs_ent: :class:`QSscoreEntity`
01408 :param ent: Entity from which to extract chains (perfect alignment assumed
01409 for superposition as in :attr:`QSscorer.ent_to_cm_1`).
01410 :type ent: :class:`EntityHandle` or :class:`EntityView`
01411 :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
01412 contains all chains belonging to a chem. group (could be
01413 from :attr:`QSscoreEntity.chem_groups` or from "keys()"
01414 or "values()" of :attr:`QSscorer.chem_mapping`)
01415
01416 :return: A list of possible symmetry subgroups (each in same format as
01417 :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
01418 with a single symmetry subgroup with a single group with all chains.
01419 """
01420
01421 angles = {}
01422 axis = {}
01423 for cnames in chem_groups:
01424 for c1, c2 in itertools.combinations(cnames, 2):
01425 cur_angles = qs_ent.GetAngles(c1, c2)
01426 if not np.any(np.isnan(cur_angles)):
01427 angles[(c1,c2)] = cur_angles
01428 cur_axis = qs_ent.GetAxis(c1, c2)
01429 if not np.any(np.isnan(cur_axis)):
01430 axis[(c1,c2)] = cur_axis
01431
01432
01433 symm_groups = []
01434 LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
01435 for angle_thr in np.arange(0.1, 1, 0.1):
01436 d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
01437 if d_symm_groups:
01438 for group in d_symm_groups:
01439 if group not in symm_groups:
01440 symm_groups.append(group)
01441 LogVerbose('Dihedral: %s' % group)
01442 LogInfo('Symmetry threshold %.1f used for angles of %s' \
01443 % (angle_thr, ent.GetName()))
01444 break
01445
01446
01447 for axis_thr in np.arange(0.1, 1, 0.1):
01448 c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
01449 if c_symm_groups:
01450 for group in c_symm_groups:
01451 if group not in symm_groups:
01452 symm_groups.append(group)
01453 LogVerbose('Cyclic: %s' % group)
01454 LogInfo('Symmetry threshold %.1f used for axis of %s' \
01455 % (axis_thr, ent.GetName()))
01456 break
01457
01458
01459 if not symm_groups:
01460 LogInfo('No symmetry-group detected in %s' % ent.GetName())
01461 symm_groups = [[tuple([c for g in chem_groups for c in g])]]
01462
01463 return symm_groups
01464
01465 def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
01466 """
01467 :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
01468 (same return type as there). Empty list if fail.
01469
01470 :param ent: See :func:`_GetSymmetrySubgroups`
01471 :param chem_groups: See :func:`_GetSymmetrySubgroups`
01472 :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
01473 :param angle_thr: Euler angles distance threshold for clustering (float).
01474 """
01475
01476 if len(angles) == 0:
01477
01478 return []
01479 else:
01480 same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
01481
01482
01483 symm_groups = []
01484 for clst in same_angles.values():
01485 group = clst.keys()
01486 if _ValidChainGroup(group, ent):
01487 if len(chem_groups) > 1:
01488
01489 symm_groups.append(zip(*group))
01490 else:
01491
01492 symm_groups.append(group)
01493 symm_groups.append(zip(*group))
01494 return symm_groups
01495
01496 def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
01497 """
01498 :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
01499 (same return type as there). Empty list if fail.
01500
01501 :param ent: See :func:`_GetSymmetrySubgroups`
01502 :param chem_groups: See :func:`_GetSymmetrySubgroups`
01503 :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
01504 :param angle_thr: Axis distance threshold for clustering (float).
01505 """
01506
01507 if len(axis) == 0:
01508
01509 return []
01510 else:
01511 same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
01512
01513
01514 symm_groups = []
01515 for clst in same_axis.values():
01516 all_chain = [item for sublist in clst.keys() for item in sublist]
01517 if len(set(all_chain)) == ent.chain_count:
01518
01519 if len(chem_groups) > 1:
01520 ref_chem_group = chem_groups[0]
01521
01522 cyclic_group_closest = []
01523 cyclic_group_iface = []
01524 for c1 in ref_chem_group:
01525 group_closest = [c1]
01526 group_iface = [c1]
01527 for chains in chem_groups[1:]:
01528
01529 closest = _GetClosestChain(ent, c1, chains)
01530
01531 closest_iface = _GetClosestChainInterface(ent, c1, chains)
01532 group_closest.append(closest)
01533 group_iface.append(closest_iface)
01534 cyclic_group_closest.append(tuple(group_closest))
01535 cyclic_group_iface.append(tuple(group_iface))
01536 if _ValidChainGroup(cyclic_group_closest, ent):
01537 symm_groups.append(cyclic_group_closest)
01538 if _ValidChainGroup(cyclic_group_iface, ent):
01539 symm_groups.append(cyclic_group_iface)
01540
01541 else:
01542 symm_groups.append(chem_groups)
01543 return symm_groups
01544
01545 def _ClusterData(data, thr, metric):
01546 """Wrapper for fclusterdata to get dict of clusters.
01547
01548 :param data: :class:`dict` (keys for ID, values used for clustering)
01549 :return: :class:`dict` {cluster_idx: {data-key: data-value}}
01550 """
01551
01552 if len(data) == 1:
01553 return {0: {data.keys()[0]: data.values()[0]} }
01554
01555 cluster_indices = fclusterdata(np.asarray(data.values()), thr,
01556 method='complete', criterion='distance',
01557 metric=metric)
01558
01559 res = {}
01560 for cluster_idx, data_key in zip(cluster_indices, data.keys()):
01561 if not cluster_idx in res:
01562 res[cluster_idx] = {}
01563 res[cluster_idx][data_key] = data[data_key]
01564 return res
01565
01566 def _AngleArrayDistance(u, v):
01567 """
01568 :return: Average angular distance of two arrays of angles.
01569 :param u: Euler angles.
01570 :param v: Euler angles.
01571 """
01572 dist = []
01573 for a,b in zip(u,v):
01574 delta = abs(a-b)
01575 if delta > np.pi:
01576 delta = abs(2*np.pi - delta)
01577 dist.append(delta)
01578 return np.mean(dist)
01579
01580 def _AxisDistance(u, v):
01581 """
01582 :return: Euclidean distance between two rotation axes. Axes can point in
01583 either direction so we ensure to use the closer one.
01584 :param u: Rotation axis.
01585 :param v: Rotation axis.
01586 """
01587
01588 u = np.asarray(u)
01589 v = np.asarray(v)
01590
01591 dv1 = u - v
01592 dv2 = u + v
01593 d1 = np.dot(dv1, dv1)
01594 d2 = np.dot(dv2, dv2)
01595 return np.sqrt(min(d1, d2))
01596
01597 def _GetClosestChain(ent, ref_chain, chains):
01598 """
01599 :return: Chain closest to *ref_chain* based on center of atoms distance.
01600 :rtype: :class:`str`
01601 :param ent: See :func:`_GetSymmetrySubgroups`
01602 :param ref_chain: We look for chains closest to this one
01603 :type ref_chain: :class:`str`
01604 :param chains: We only consider these chains
01605 :type chains: :class:`list` of :class:`str`
01606 """
01607 contacts = []
01608 ref_center = ent.FindChain(ref_chain).center_of_atoms
01609 for ch in chains:
01610 center = ent.FindChain(ch).center_of_atoms
01611 contacts.append((geom.Distance(ref_center, center), ch))
01612 closest_chain = min(contacts)[1]
01613 return closest_chain
01614
01615 def _GetClosestChainInterface(ent, ref_chain, chains):
01616 """
01617 :return: Chain with biggest interface (within 10 A) to *ref_chain*.
01618 :rtype: :class:`str`
01619 :param ent: See :func:`_GetSymmetrySubgroups`
01620 :param ref_chain: We look for chains closest to this one
01621 :type ref_chain: :class:`str`
01622 :param chains: We only consider these chains
01623 :type chains: :class:`list` of :class:`str`
01624 """
01625
01626
01627 closest = []
01628 for ch in chains:
01629 iface_view = ent.Select('cname=%s and 10 <> [cname=%s]' % (ref_chain, ch))
01630 nr_res = iface_view.residue_count
01631 closest.append((nr_res, ch))
01632 closest_chain = max(closest)[1]
01633 return closest_chain
01634
01635 def _ValidChainGroup(group, ent):
01636 """
01637 :return: True, if *group* has unique chain names and as many chains as *ent*
01638 :rtype: :class:`bool`
01639 :param group: Symmetry groups to check
01640 :type group: Same as :attr:`QSscorer.symm_1`
01641 :param ent: See :func:`_GetSymmetrySubgroups`
01642 """
01643 all_chain = [item for sublist in group for item in sublist]
01644 if len(all_chain) == len(set(all_chain)) and\
01645 len(all_chain) == ent.chain_count:
01646 return True
01647 return False
01648
01649 def _LimitChemMapping(chem_mapping, limit_1, limit_2):
01650 """
01651 :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
01652 :rtype: Same as :attr:`QSscorer.chem_mapping`
01653 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01654 :param limit_1: Limits chain names in chem_mapping.keys()
01655 :type limit_1: List/tuple of strings
01656 :param limit_2: Limits chain names in chem_mapping.values()
01657 :type limit_2: List/tuple of strings
01658 """
01659
01660 limit_1_set = set(limit_1)
01661 limit_2_set = set(limit_2)
01662 asu_chem_mapping = dict()
01663 for key, value in chem_mapping.iteritems():
01664 new_key = tuple(set(key) & limit_1_set)
01665 if new_key:
01666 asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
01667 return asu_chem_mapping
01668
01669
01670 def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
01671 """
01672 :return: Dictionary of number of mappings and superpositions to be performed.
01673 Returned as *result[X][Y] = number* with X = "intra" or "inter" and
01674 Y = "mappings" or "superpositions". The idea is that for each
01675 pairwise superposition we check all possible mappings.
01676 We can check the combinations within (intra) a symmetry group and
01677 once established, we check the combinations between different (inter)
01678 symmetry groups.
01679 :param symm_1: See :attr:`QSscorer.symm_1`
01680 :param symm_2: See :attr:`QSscorer.symm_2`
01681 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01682 """
01683
01684 c = {}
01685 c['intra'] = {}
01686 c['inter'] = {}
01687 c['intra']['mappings'] = 0
01688 c['intra']['superpositions'] = 0
01689 c['inter']['mappings'] = 0
01690 c['inter']['superpositions'] = 0
01691
01692 ref_symm_1 = symm_1[0]
01693 ref_symm_2 = symm_2[0]
01694 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01695 for g1, g2 in asu_chem_mapping.iteritems():
01696 chain_superpositions = len(g1) * len(g2)
01697 c['intra']['superpositions'] += chain_superpositions
01698 map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
01699 c['intra']['mappings'] += chain_superpositions * map_per_sup
01700 if len(symm_1) == 1 or len(symm_2) == 1:
01701 return c
01702
01703 asu_superposition = min(len(symm_1), len(symm_2))
01704 c['inter']['superpositions'] = asu_superposition
01705 asu = {tuple(symm_1): tuple(symm_2)}
01706 map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
01707 c['inter']['mappings'] = asu_superposition * map_per_sup
01708 return c
01709
01710 def _PermutationOrCombinations(sup1, sup2, chem_mapping):
01711 """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
01712
01713 chem_map = {}
01714 for a,b in chem_mapping.iteritems():
01715 new_a = tuple([x for x in a if x != sup1])
01716 new_b = tuple([x for x in b if x != sup2])
01717 chem_map[new_a] = new_b
01718 mapp_nr = 1.0
01719 for a, b in chem_map.iteritems():
01720 if len(a) == len(b):
01721 mapp_nr *= factorial(len(a))
01722 elif len(a) < len(b):
01723 mapp_nr *= binom(len(b), len(a))
01724 elif len(a) > len(b):
01725 mapp_nr *= binom(len(a), len(b))
01726 return int(mapp_nr)
01727
01728 def _ListPossibleMappings(sup1, sup2, chem_mapping):
01729 """
01730 Return a flat list of all possible mappings given *chem_mapping* and keeping
01731 mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
01732 this is all the mappings to check for a given superposition.
01733
01734 Elements in first complex are defined by *chem_mapping.keys()* (list of list
01735 of elements) and elements in second complex by *chem_mapping.values()*. If
01736 complexes don't have same number of elements, we map only elements for the one
01737 with less. Also mapping is only between elements of mapped groups according to
01738 *chem_mapping*.
01739
01740 :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
01741 value = element in chem_mapping-value)
01742 :param sup1: Element for which mapping is fixed.
01743 :type sup1: Like element in chem_mapping-key
01744 :param sup2: Element for which mapping is fixed.
01745 :type sup2: Like element in chem_mapping-value
01746 :param chem_mapping: Defines mapping between groups of elements (e.g. result
01747 of :func:`_LimitChemMapping`).
01748 :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
01749
01750 :raises: :class:`QSscoreError` if reference complex (first one or one with
01751 less elements) has more elements for any given mapped group.
01752 """
01753
01754 chain1 = [i for s in chem_mapping.keys() for i in s]
01755 chain2 = [i for s in chem_mapping.values() for i in s]
01756 swap = False
01757 if len(chain1) > len(chain2):
01758 swap = True
01759
01760 chem_map = {}
01761 for a, b in chem_mapping.iteritems():
01762 new_a = tuple([x for x in a if x != sup1])
01763 new_b = tuple([x for x in b if x != sup2])
01764 if swap:
01765 chem_map[new_b] = new_a
01766 else:
01767 chem_map[new_a] = new_b
01768
01769
01770 chem_perm = []
01771 chem_ref = []
01772 for a, b in chem_map.iteritems():
01773 if len(a) == len(b):
01774 chem_perm.append(list(itertools.permutations(b)))
01775 chem_ref.append(a)
01776 elif len(a) < len(b):
01777 chem_perm.append(list(itertools.combinations(b, len(a))))
01778 chem_ref.append(a)
01779 else:
01780 raise QSscoreError('Impossible to define reference group: %s' \
01781 % str(chem_map))
01782
01783 mappings = []
01784 flat_ref = [i for s in chem_ref for i in s]
01785 for perm in itertools.product(*chem_perm):
01786 flat_perm = [i for s in perm for i in s]
01787 d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
01788 if swap:
01789 d = {v: k for k, v in d.items()}
01790 d.update({sup1: sup2})
01791 mappings.append(d)
01792 return mappings
01793
01794
01795 def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
01796 sup_thr=4, sup_fract=0.8, find_best=True):
01797 """
01798 Quick check if we can superpose two chains and get a mapping for all other
01799 chains using the same transformation. The mapping is defined by sufficient
01800 overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
01801
01802 :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
01803 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
01804 Views are ok but only to select full chains!
01805 :param ent_2: Entity to map to (perfect alignment assumed between
01806 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
01807 Views are ok but only to select full chains!
01808 :param symm_1: Symmetry groups to use. We only superpose chains within
01809 reference symmetry group of *symm_1* and *symm_2*.
01810 See :attr:`QSscorer.symm_1`
01811 :param symm_2: See :attr:`QSscorer.symm_2`
01812 :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
01813 All chains in *ent_1* / *ent_2* must be listed here!
01814 :param sup_thr: Distance around transformed chain in *ent_1* to check for
01815 overlap.
01816 :type sup_thr: :class:`float`
01817 :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
01818 overlapped for overlap to be sufficient.
01819 :type sup_fract: :class:`float`
01820 :param find_best: If True, we look for best mapping according to
01821 :func:`_ChainRMSD`. Otherwise, we return first suitable
01822 mapping.
01823 :type find_best: :class:`bool`
01824
01825 :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
01826 found, is as in :attr:`QSscorer.chain_mapping`.
01827 :rtype: :class:`dict` (:class:`str` / :class:`str`)
01828 """
01829
01830 ref_symm_1 = symm_1[0]
01831 ref_symm_2 = symm_2[0]
01832 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01833
01834 rmsd_mappings = []
01835 for g1, g2 in asu_chem_mapping.iteritems():
01836
01837
01838
01839 for c1, c2 in itertools.product(g1, g2):
01840
01841 chain_1 = ent_1.Select('cname=%s' % c1)
01842 chain_2 = ent_2.Select('cname=%s' % c2)
01843 res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
01844
01845 mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
01846 res.transformation, sup_thr,
01847 sup_fract)
01848 if not mapping:
01849 continue
01850
01851 if not find_best:
01852 return mapping
01853 else:
01854
01855 rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
01856 rmsd_mappings.append((rmsd, mapping))
01857
01858 if rmsd_mappings:
01859 return min(rmsd_mappings)[1]
01860 else:
01861 return None
01862
01863 def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
01864 sup_thr, sup_fract):
01865 """
01866 :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
01867 (see :func:`_CheckClosedSymmetry`).
01868 :param ent_1: See :func:`_CheckClosedSymmetry`
01869 :param ent_2: See :func:`_CheckClosedSymmetry`
01870 :param chem_mapping: See :func:`_CheckClosedSymmetry`
01871 :param transformation: Superposition transformation to be applied to *ent_1*.
01872 :param sup_thr: See :func:`_CheckClosedSymmetry`
01873 :param sup_fract: See :func:`_CheckClosedSymmetry`
01874 """
01875
01876
01877
01878
01879
01880 if ent_1.chain_count > ent_2.chain_count:
01881 swap = True
01882 ent_1, ent_2 = ent_2, ent_1
01883 transformation = geom.Invert(transformation)
01884 chem_pairs = zip(chem_mapping.values(), chem_mapping.keys())
01885 else:
01886 swap = False
01887 chem_pairs = chem_mapping.iteritems()
01888
01889 if ent_1.chain_count == 0:
01890 return None
01891
01892 chem_partners = dict()
01893 for cg1, cg2 in chem_pairs:
01894 for ch in cg1:
01895 chem_partners[ch] = set(cg2)
01896
01897 mapping = dict()
01898 mapped_chains = set()
01899 for ch_1 in ent_1.chains:
01900
01901 ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
01902 for a_1 in ch_1.handle.atoms:
01903 transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
01904 a_within = ent_2.FindWithin(transformed_pos, sup_thr)
01905 for a_2 in a_within:
01906 ch_atoms[a_2.chain.name].add(a_2.hash_code)
01907
01908 max_num, max_name = max((len(atoms), name)
01909 for name, atoms in ch_atoms.iteritems())
01910
01911 ch_2 = ent_2.FindChain(max_name)
01912 if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
01913 return None
01914
01915 ch_1_name = ch_1.name
01916 if ch_1_name not in chem_partners:
01917 raise RuntimeError("Chem. mapping doesn't contain all chains!")
01918 if max_name not in chem_partners[ch_1_name]:
01919 return None
01920
01921 if max_name in mapped_chains:
01922 return None
01923
01924 mapping[ch_1_name] = max_name
01925 mapped_chains.add(max_name)
01926
01927 if swap:
01928 mapping = {v: k for k, v in mapping.iteritems()}
01929 return mapping
01930
01931 def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
01932 """
01933 :return: RMSD between complexes considering chain mapping.
01934 :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
01935 mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
01936 :param ent_2: Entity which was mapped to (perfect alignment assumed between
01937 mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
01938 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
01939 :param transformation: Superposition transformation to be applied to *ent_1*.
01940 """
01941
01942 rmsds = []
01943 atoms = []
01944 for c1, c2 in chain_mapping.iteritems():
01945
01946 chain_1 = ent_1.Select('cname=%s' % c1)
01947 chain_2 = ent_2.Select('cname=%s' % c2)
01948 atom_count = chain_1.atom_count
01949 if atom_count != chain_2.atom_count:
01950 raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
01951
01952 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
01953
01954 rmsds.append(rmsd)
01955 atoms.append(float(atom_count))
01956
01957 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
01958 return rmsd
01959
01960 class _CachedRMSD:
01961 """Helper object for repetitive RMSD calculations.
01962 Meant to speed up :func:`_GetChainMapping` but could also be used to replace
01963 :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
01964
01965 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
01966 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
01967 """
01968 def __init__(self, ent_1, ent_2):
01969
01970 self.ent_1 = ent_1
01971 self.ent_2 = ent_2
01972 self._chain_views_1 = dict()
01973 self._chain_views_2 = dict()
01974 self._pair_rmsd = dict()
01975
01976 def GetChainView1(self, cname):
01977 """Get cached view on chain *cname* for :attr:`ent_1`."""
01978 if cname not in self._chain_views_1:
01979 self._chain_views_1[cname] = self.ent_1.Select('cname=%s' % cname)
01980 return self._chain_views_1[cname]
01981
01982 def GetChainView2(self, cname):
01983 """Get cached view on chain *cname* for :attr:`ent_2`."""
01984 if cname not in self._chain_views_2:
01985 self._chain_views_2[cname] = self.ent_2.Select('cname=%s' % cname)
01986 return self._chain_views_2[cname]
01987
01988 def GetSuperposition(self, c1, c2):
01989 """Get superposition result (no change in entities!) for *c1* to *c2*.
01990 This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
01991
01992 :param c1: chain name for :attr:`ent_1`.
01993 :param c2: chain name for :attr:`ent_2`.
01994 """
01995
01996 self._pair_rmsd = dict()
01997
01998 chain_1 = self.GetChainView1(c1)
01999 chain_2 = self.GetChainView2(c2)
02000 if chain_1.atom_count != chain_2.atom_count:
02001 raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
02002 return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
02003
02004 def GetMappedRMSD(self, chain_mapping, transformation):
02005 """
02006 :return: RMSD between complexes considering chain mapping.
02007 :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
02008 :param transformation: Superposition transformation (e.g. res.transformation
02009 for res = :func:`GetSuperposition`).
02010 """
02011
02012 rmsds = []
02013 atoms = []
02014 for c1, c2 in chain_mapping.iteritems():
02015
02016 if (c1, c2) in self._pair_rmsd:
02017 atom_count, rmsd = self._pair_rmsd[(c1, c2)]
02018 else:
02019
02020 chain_1 = self.GetChainView1(c1)
02021 chain_2 = self.GetChainView2(c2)
02022 atom_count = chain_1.atom_count
02023 if atom_count != chain_2.atom_count:
02024 raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
02025
02026 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
02027 self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
02028
02029 rmsds.append(rmsd)
02030 atoms.append(float(atom_count))
02031
02032 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
02033 return rmsd
02034
02035
02036 def _CleanUserSymmetry(symm, ent):
02037 """Clean-up user provided symmetry.
02038
02039 :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
02040 :param ent: Entity from which to extract chain names
02041 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
02042 :return: New symmetry group limited to chains in sub-structure *ent*
02043 (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
02044 """
02045
02046 chain_names = set([ch.name for ch in ent.chains])
02047 new_symm = list()
02048 for symm_group in symm:
02049 new_group = tuple(ch for ch in symm_group if ch in chain_names)
02050 if new_group:
02051 new_symm.append(new_group)
02052
02053 if new_symm != symm:
02054 LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
02055
02056 lengths = [len(symm_group) for symm_group in new_symm]
02057 if lengths[1:] != lengths[:-1]:
02058 LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
02059 % (ent.GetName()))
02060 return []
02061
02062 s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
02063 if len(s_chain_names) != sum(lengths):
02064 LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
02065 % (ent.GetName()))
02066 return []
02067 if s_chain_names != chain_names:
02068 LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
02069 % (ent.GetName()))
02070 return []
02071
02072 return new_symm
02073
02074 def _AreValidSymmetries(symm_1, symm_2):
02075 """Check symmetry pair for major problems.
02076
02077 :return: False if any of the two is empty or if they're compatible in size.
02078 :rtype: :class:`bool`
02079 """
02080 if not symm_1 or not symm_2:
02081 return False
02082 if len(symm_1) != 1 or len(symm_2) != 1:
02083 if not len(symm_1[0]) == len(symm_2[0]):
02084 LogWarning('Symmetry groups of different sizes. Ignoring them!')
02085 return False
02086 return True
02087
02088 def _GetMappedAlignments(ent_1, ent_2, chain_mapping):
02089 """
02090 :return: Alignments of 2 structures given chain mapping
02091 (see :attr:`QSscorer.alignments`).
02092 :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
02093 Views to this entity attached to first sequence of each aln.
02094 :param ent_2: Entity containing all chains in *chain_mapping.values()*.
02095 Views to this entity attached to second sequence of each aln.
02096 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
02097 """
02098 alns = []
02099 for ch_1_name in sorted(chain_mapping):
02100
02101 ch_1 = ent_1.FindChain(ch_1_name)
02102 seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
02103 ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
02104 seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
02105
02106 aln = _AlignAtomSeqs(seq_1, seq_2)
02107 if aln: alns.append(aln)
02108 return alns
02109
02110 def _GetMappedResidues(alns):
02111 """
02112 :return: Mapping of shared residues in *alns* (with views attached)
02113 (see :attr:`QSscorer.mapped_residues`).
02114 :param alns: See :attr:`QSscorer.alignments`
02115 """
02116 mapped_residues = dict()
02117 for aln in alns:
02118
02119 c1 = aln.GetSequence(0).name
02120 mapped_residues[c1] = dict()
02121
02122 v1, v2 = seq.ViewsFromAlignment(aln)
02123 for res_1, res_2 in zip(v1.residues, v2.residues):
02124 r1 = res_1.number.num
02125 r2 = res_2.number.num
02126 mapped_residues[c1][r1] = r2
02127
02128 return mapped_residues
02129
02130 def _GetExtraWeights(contacts, done, mapped_residues):
02131 """Return sum of extra weights for contacts of chains in set and not in done.
02132 :return: Tuple (weight_extra_mapped, weight_extra_all).
02133 weight_extra_mapped only sums if both cX,rX in mapped_residues
02134 weight_extra_all sums all
02135 :param contacts: See :func:`GetContacts` for first entity
02136 :param done: List of (c1, c2, r1, r2) tuples to ignore
02137 :param mapped_residues: See :func:`_GetMappedResidues`
02138 """
02139 weight_extra_mapped = 0
02140 weight_extra_non_mapped = 0
02141 for c1 in contacts:
02142 for c2 in contacts[c1]:
02143 for r1 in contacts[c1][c2]:
02144 for r2 in contacts[c1][c2][r1]:
02145 if (c1, c2, r1, r2) not in done:
02146 weight = _weight(contacts[c1][c2][r1][r2])
02147 if c1 in mapped_residues and r1 in mapped_residues[c1] \
02148 and c2 in mapped_residues and r2 in mapped_residues[c2]:
02149 weight_extra_mapped += weight
02150 else:
02151 weight_extra_non_mapped += weight
02152 return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
02153
02154 def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
02155 """Get QS scores (see :class:`QSscorer`).
02156
02157 Note that if some chains are not to be considered at all, they must be removed
02158 from *contacts_1* / *contacts_2* prior to calling this.
02159
02160 :param contacts_1: See :func:`GetContacts` for first entity
02161 :param contacts_2: See :func:`GetContacts` for second entity
02162 :param mapped_residues: See :func:`_GetMappedResidues`
02163 :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
02164 for *contacts_2*.
02165 :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
02166 :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
02167 see :attr:`QSscorer.global_score`)
02168 """
02169
02170 done_1 = set()
02171 done_2 = set()
02172 weighted_scores = 0
02173 weight_sum = 0
02174
02175
02176 for c11 in contacts_1:
02177
02178 if c11 not in mapped_residues: continue
02179 c1T = chain_mapping[c11]
02180
02181 for c21 in contacts_1[c11]:
02182
02183 if c21 not in mapped_residues: continue
02184 c2T = chain_mapping[c21]
02185
02186 flipped_chains = (c1T > c2T)
02187 if flipped_chains:
02188 c12, c22 = c2T, c1T
02189 else:
02190 c12, c22 = c1T, c2T
02191
02192 if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
02193
02194 for r11 in contacts_1[c11][c21]:
02195
02196 if r11 not in mapped_residues[c11]: continue
02197 r1T = mapped_residues[c11][r11]
02198
02199 for r21 in contacts_1[c11][c21][r11]:
02200
02201 if r21 not in mapped_residues[c21]: continue
02202 r2T = mapped_residues[c21][r21]
02203
02204 if flipped_chains:
02205 r12, r22 = r2T, r1T
02206 else:
02207 r12, r22 = r1T, r2T
02208
02209 if r12 not in contacts_2[c12][c22]: continue
02210 if r22 not in contacts_2[c12][c22][r12]: continue
02211
02212 d1 = contacts_1[c11][c21][r11][r21]
02213 d2 = contacts_2[c12][c22][r12][r22]
02214 weight = _weight(min(d1, d2))
02215 weighted_scores += weight * (1 - abs(d1 - d2) / 12)
02216 weight_sum += weight
02217
02218 done_1.add((c11, c21, r11, r21))
02219 done_2.add((c12, c22, r12, r22))
02220
02221 LogVerbose("Shared contacts: %d" % len(done_1))
02222
02223
02224 weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
02225 mapped_residues_2 = dict()
02226 for c1 in mapped_residues:
02227 c2 = chain_mapping[c1]
02228 mapped_residues_2[c2] = set()
02229 for r1 in mapped_residues[c1]:
02230 mapped_residues_2[c2].add(mapped_residues[c1][r1])
02231 weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
02232 weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
02233 weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
02234
02235
02236 denom_best = weight_sum + weight_extra_mapped
02237 denom_all = weight_sum + weight_extra_all
02238 if denom_best == 0:
02239 QS_best = 0
02240 else:
02241 QS_best = weighted_scores / denom_best
02242 if denom_all == 0:
02243 QS_global = 0
02244 else:
02245 QS_global = weighted_scores / denom_all
02246 return QS_best, QS_global
02247
02248 def _weight(dist):
02249 """
02250 This weight expresses the probability of two residues to interact given the CB
02251 distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
02252 """
02253 if dist <= 5.0:
02254 return 1
02255 x = (dist-5.0)/4.28;
02256 return np.exp(-2*x*x)
02257
02258
02259 def _GetQsSuperposition(alns):
02260 """
02261 :return: Superposition result based on shared CA atoms in *alns*
02262 (with views attached) (see :attr:`QSscorer.superposition`).
02263 :param alns: See :attr:`QSscorer.alignments`
02264 """
02265
02266 if not alns:
02267 raise QSscoreError('No successful alignments for superposition!')
02268
02269 view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
02270 view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
02271
02272 for aln in alns:
02273 for col in aln:
02274 res_1 = col.GetResidue(0)
02275 res_2 = col.GetResidue(1)
02276 if res_1.IsValid() and res_2.IsValid():
02277 ca_1 = res_1.FindAtom('CA')
02278 ca_2 = res_2.FindAtom('CA')
02279 if ca_1.IsValid() and ca_2.IsValid():
02280 view_1.AddAtom(ca_1)
02281 view_2.AddAtom(ca_2)
02282
02283 res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
02284 return res
02285
02286
02287 def _AddResidue(edi, res, rnum, chain, calpha_only):
02288 """
02289 Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
02290 Either all atoms added or (if *calpha_only*) only CA.
02291 """
02292 if calpha_only:
02293 ca_atom = res.FindAtom('CA')
02294 if ca_atom.IsValid():
02295 new_res = edi.AppendResidue(chain, res.name, rnum)
02296 edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
02297 else:
02298 new_res = edi.AppendResidue(chain, res.name, rnum)
02299 for atom in res.atoms:
02300 edi.InsertAtom(new_res, atom.name, atom.pos)
02301
02302 def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only):
02303 """
02304 Create two new entities (based on the alignments attached views) where all
02305 residues have same numbering (when they're aligned) and they are all pushed to
02306 a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
02307 but not contained in *alns*.
02308
02309 Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
02310
02311 :param alns: List of alignments with attached views (first sequence: *ent_1*,
02312 second: *ent_2*). Residue number in single chain is column index
02313 of current alignment + sum of lengths of all previous alignments
02314 (order of alignments as in input list).
02315 :type alns: See :attr:`QSscorer.alignments`
02316 :param ent_1: First entity to process.
02317 :type ent_1: :class:`~ost.mol.EntityHandle`
02318 :param ent_2: Second entity to process.
02319 :type ent_2: :class:`~ost.mol.EntityHandle`
02320 :param calpha_only: If True, we only include CA atoms instead of all.
02321 :type calpha_only: :class:`bool`
02322
02323 :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
02324 :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
02325 """
02326
02327 ent_ren_1 = mol.CreateEntity()
02328 ed_1 = ent_ren_1.EditXCS()
02329 new_chain_1 = ed_1.InsertChain('X')
02330
02331 ent_ren_2 = mol.CreateEntity()
02332 ed_2 = ent_ren_2.EditXCS()
02333 new_chain_2 = ed_2.InsertChain('X')
02334
02335 rnum = 0
02336 chain_done_1 = set()
02337 chain_done_2 = set()
02338 for aln in alns:
02339 chain_done_1.add(aln.GetSequence(0).name)
02340 chain_done_2.add(aln.GetSequence(1).name)
02341 for col in aln:
02342 rnum += 1
02343
02344 res_1 = col.GetResidue(0)
02345 if res_1.IsValid():
02346 _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
02347 res_2 = col.GetResidue(1)
02348 if res_2.IsValid():
02349 _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
02350
02351 for chain in ent_1.chains:
02352 if chain.name in chain_done_1:
02353 continue
02354 for res in chain.residues:
02355 rnum += 1
02356 _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
02357 for chain in ent_2.chains:
02358 if chain.name in chain_done_2:
02359 continue
02360 for res in chain.residues:
02361 rnum += 1
02362 _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
02363
02364 ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
02365 ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
02366
02367 if not conop.GetDefaultLib():
02368 raise RuntimeError("QSscore computation requires a compound library!")
02369 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
02370 pr.Process(ent_ren_1)
02371 ed_1.UpdateICS()
02372 pr.Process(ent_ren_2)
02373 ed_2.UpdateICS()
02374 return ent_ren_1, ent_ren_2
02375
02376 def _ComputeLDDTScore(ref, mdl):
02377 """
02378 :return: lDDT of *mdl* vs *ref* (see :attr:`QSscorer.lddt_score`).
02379 :param mdl: Reference entity (see :attr:`QSscorer.lddt_mdl`)
02380 :param ref: Model entity (see :attr:`QSscorer.lddt_ref`)
02381 """
02382
02383 LogInfo('Reference %s has: %s residues' % (ref.GetName(), ref.residue_count))
02384 LogInfo('Model %s has: %s residues' % (mdl.GetName(), mdl.residue_count))
02385
02386 lddt_score = mol.alg.LocalDistDiffTest(mdl.Select(''), ref.Select(''),
02387 2., 8., 'lddt')
02388 LogInfo('lDDT score: %.3f' % lddt_score)
02389
02390 for r in mdl.residues:
02391 if r.HasProp('lddt'):
02392 for a in r.atoms:
02393 a.SetBFactor(r.GetFloatProp('lddt'))
02394 else:
02395 for a in r.atoms:
02396 a.SetBFactor(0.0)
02397
02398 return lddt_score
02399
02400
02401 __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
02402 'GetContacts')