00001 """
00002 Scoring of quaternary structures as in Martino's 2017 paper.
00003
00004 .. note ::
00005
00006 Requirements for use:
00007
00008 - A default :class:`compound library <ost.conop.CompoundLib>` must be defined
00009 and accessible via :func:`~ost.conop.GetDefaultLib`. This is set by default
00010 when executing scripts with ``ost``. Otherwise, you must set this with
00011 :func:`~ost.conop.SetDefaultLib`.
00012 - ClustalW must be installed (unless you provide chain mappings)
00013 - Python modules `numpy` and `scipy` must be installed and available
00014 (e.g. use ``pip install scipy numpy``)
00015
00016 Authors: Gerardo Tauriello, Martino Bertoni
00017 """
00018
00019 from ost import mol, geom, conop, seq, settings
00020 from ost import LogError, LogWarning, LogScript, LogInfo, LogVerbose, LogDebug
00021 from ost.bindings.clustalw import ClustalW
00022 import numpy as np
00023 from scipy.misc import factorial
00024 from scipy.special import binom
00025 from scipy.cluster.hierarchy import fclusterdata
00026 import itertools
00027
00028
00029
00030
00031
00032 class QSscoreError(Exception):
00033 """Exception to be raised for "acceptable" exceptions in QS scoring.
00034
00035 Those are cases we might want to capture for default behavior.
00036 """
00037 pass
00038
00039 class QSscorer:
00040 """Object to compute QS scores.
00041
00042 Simple usage without any precomputed contacts, symmetries and mappings:
00043
00044 .. code-block:: python
00045
00046 import ost
00047 from ost.mol.alg import qsscoring
00048
00049 # load two biounits to compare
00050 ent_full = ost.io.LoadPDB('3ia3', remote=True)
00051 ent_1 = ent_full.Select('cname=A,D')
00052 ent_2 = ent_full.Select('cname=B,C')
00053 # get score
00054 ost.PushVerbosityLevel(3)
00055 try:
00056 qs_scorer = qsscoring.QSscorer(ent_1, ent_2)
00057 ost.LogScript('QSscore:', str(qs_scorer.global_score))
00058 ost.LogScript('Chain mapping used:', str(qs_scorer.chain_mapping))
00059 # commonly you want the QS global score as output
00060 qs_score = qs_scorer.global_score
00061 except qsscoring.QSscoreError as ex:
00062 # default handling: report failure and set score to 0
00063 ost.LogError('QSscore failed:', str(ex))
00064 qs_score = 0
00065
00066 For maximal performance when computing QS scores of the same entity with many
00067 others, it is advisable to construct and reuse :class:`QSscoreEntity` objects.
00068
00069 Any known / precomputed information can be filled into the appropriate
00070 attribute here (no checks done!). Otherwise most quantities are computed on
00071 first access and cached (lazy evaluation). Setters are provided to set values
00072 with extra checks (e.g. :func:`SetSymmetries`).
00073
00074 All necessary seq. alignments are done by global BLOSUM62-based alignment. A
00075 multiple sequence alignment is performed with ClustalW unless
00076 :attr:`chain_mapping` is provided manually. You will need to have an
00077 executable ``clustalw`` or ``clustalw2`` in your ``PATH`` or you must set
00078 :attr:`clustalw_bin` accordingly. Otherwise an exception
00079 (:class:`ost.settings.FileNotFound`) is thrown.
00080
00081 Formulas for QS scores:
00082
00083 ::
00084
00085 - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
00086 - QS_global = weighted_scores / (weight_sum + weight_extra_all)
00087 -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
00088 -> weight_sum = sum(w(min(d1,d2))) for shared
00089 -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
00090 -> weight_extra_all = sum(w(d)) for all non-shared
00091 -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
00092
00093 :param ent_1: First structure to be scored.
00094 :type ent_1: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
00095 :class:`~ost.mol.EntityView`
00096 :param ent_2: Second structure to be scored.
00097 :type ent_2: :class:`QSscoreEntity`, :class:`~ost.mol.EntityHandle` or
00098 :class:`~ost.mol.EntityView`
00099
00100 :raises: :class:`QSscoreError` if input structures are invalid or are monomers
00101 or have issues that make it impossible for a QS score to be computed.
00102
00103 .. attribute:: qs_ent_1
00104
00105 :class:`QSscoreEntity` object for *ent_1* given at construction.
00106 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
00107 set it to 'pdb_1' using :func:`~QSscoreEntity.SetName`.
00108
00109 .. attribute:: qs_ent_2
00110
00111 :class:`QSscoreEntity` object for *ent_2* given at construction.
00112 If entity names (:attr:`~QSscoreEntity.original_name`) are not unique, we
00113 set it to 'pdb_2' using :func:`~QSscoreEntity.SetName`.
00114
00115 .. attribute:: calpha_only
00116
00117 True if any of the two structures is CA-only (after cleanup).
00118
00119 :type: :class:`bool`
00120
00121 .. attribute:: max_ca_per_chain_for_cm
00122
00123 Maximal number of CA atoms to use in each chain to determine chain mappings.
00124 Setting this to -1 disables the limit. Limiting it speeds up determination
00125 of symmetries and chain mappings. By default it is set to 100.
00126
00127 :type: :class:`int`
00128 """
00129 def __init__(self, ent_1, ent_2):
00130
00131 if isinstance(ent_1, QSscoreEntity):
00132 self.qs_ent_1 = ent_1
00133 else:
00134 self.qs_ent_1 = QSscoreEntity(ent_1)
00135 if isinstance(ent_2, QSscoreEntity):
00136 self.qs_ent_2 = ent_2
00137 else:
00138 self.qs_ent_2 = QSscoreEntity(ent_2)
00139
00140 if not self.qs_ent_1.is_valid or not self.qs_ent_2.is_valid:
00141 raise QSscoreError("Invalid input in QSscorer!")
00142
00143 if self.qs_ent_1.original_name == self.qs_ent_2.original_name:
00144 self.qs_ent_1.SetName('pdb_1')
00145 self.qs_ent_2.SetName('pdb_2')
00146 else:
00147 self.qs_ent_1.SetName(self.qs_ent_1.original_name)
00148 self.qs_ent_2.SetName(self.qs_ent_2.original_name)
00149
00150 self.calpha_only = self.qs_ent_1.calpha_only or self.qs_ent_2.calpha_only
00151 self.max_ca_per_chain_for_cm = 100
00152
00153 self._chem_mapping = None
00154 self._ent_to_cm_1 = None
00155 self._ent_to_cm_2 = None
00156 self._symm_1 = None
00157 self._symm_2 = None
00158 self._chain_mapping = None
00159 self._alignments = None
00160 self._mapped_residues = None
00161 self._global_score = None
00162 self._best_score = None
00163 self._superposition = None
00164 self._lddt_score = None
00165 self._lddt_mdl = None
00166 self._lddt_ref = None
00167 self._clustalw_bin = None
00168
00169 @property
00170 def chem_mapping(self):
00171 """Inter-complex mapping of chemical groups.
00172
00173 Each group (see :attr:`QSscoreEntity.chem_groups`) is mapped according to
00174 highest sequence identity. Alignment is between longest sequences in groups.
00175
00176 Limitations:
00177
00178 - If different numbers of groups, we map only the groups for the complex
00179 with less groups (rest considered unmapped and shown as warning)
00180 - The mapping is forced: the "best" mapping will be chosen independently of
00181 how low the seq. identity may be
00182
00183 :getter: Computed on first use (cached)
00184 :type: :class:`dict` with key = :class:`tuple` of chain names in
00185 :attr:`qs_ent_1` and value = :class:`tuple` of chain names in
00186 :attr:`qs_ent_2`.
00187
00188 :raises: :class:`QSscoreError` if we end up having less than 2 chains for
00189 either entity in the mapping (can happen if chains do not have CA
00190 atoms).
00191 """
00192 if self._chem_mapping is None:
00193 self._chem_mapping = _GetChemGroupsMapping(self.qs_ent_1, self.qs_ent_2)
00194 return self._chem_mapping
00195
00196 @property
00197 def ent_to_cm_1(self):
00198 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries.
00199
00200 Properties:
00201
00202 - Includes only residues aligned according to :attr:`chem_mapping`
00203 - Includes only 1 CA atom per residue
00204 - Has at least 5 and at most :attr:`max_ca_per_chain_for_cm` atoms per chain
00205 - All chains of the same chemical group have the same number of atoms
00206 (also in :attr:`ent_to_cm_2` according to :attr:`chem_mapping`)
00207 - All chains appearing in :attr:`chem_mapping` appear in this entity
00208 (so the two can be safely used together)
00209
00210 This entity might be transformed (i.e. all positions rotated/translated by
00211 same transformation matrix) if this can speed up computations. So do not
00212 assume fixed global positions (but relative distances will remain fixed).
00213
00214 :getter: Computed on first use (cached)
00215 :type: :class:`~ost.mol.EntityHandle`
00216
00217 :raises: :class:`QSscoreError` if any chain ends up having less than 5 res.
00218 """
00219 if self._ent_to_cm_1 is None:
00220 self._ComputeAlignedEntities()
00221 return self._ent_to_cm_1
00222
00223 @property
00224 def ent_to_cm_2(self):
00225 """Subset of :attr:`qs_ent_1` used to compute chain mapping and symmetries
00226 (see :attr:`ent_to_cm_1` for details).
00227 """
00228 if self._ent_to_cm_2 is None:
00229 self._ComputeAlignedEntities()
00230 return self._ent_to_cm_2
00231
00232 @property
00233 def symm_1(self):
00234 """Symmetry groups for :attr:`qs_ent_1` used to speed up chain mapping.
00235
00236 This is a list of chain-lists where each chain-list can be used reconstruct
00237 the others via cyclic C or dihedral D symmetry. The first chain-list is used
00238 as a representative symmetry group. For heteromers, the group-members must
00239 contain all different seqres in oligomer.
00240
00241 Example: symm. groups [(A,B,C), (D,E,F), (G,H,I)] means that there are
00242 symmetry transformations to get (D,E,F) and (G,H,I) from (A,B,C).
00243
00244 Properties:
00245
00246 - All symmetry group tuples have the same length (num. of chains)
00247 - All chains in :attr:`ent_to_cm_1` appear (w/o duplicates)
00248 - For heteros: symmetry group tuples have all different chem. groups
00249 - Trivial symmetry group = one tuple with all chains (used if inconsistent
00250 data provided or if no symmetry is found)
00251 - Either compatible to :attr:`symm_2` or trivial symmetry groups used.
00252 Compatibility requires same lengths of symmetry group tuples and it must
00253 be possible to get an overlap (80% of residues covered within 6 A of a
00254 (chem. mapped) chain) of all chains in representative symmetry groups by
00255 superposing one pair of chains.
00256
00257 :getter: Computed on first use (cached)
00258 :type: :class:`list` of :class:`tuple` of :class:`str` (chain names)
00259 """
00260 if self._symm_1 is None:
00261 self._ComputeSymmetry()
00262 return self._symm_1
00263
00264 @property
00265 def symm_2(self):
00266 """Symmetry groups for :attr:`qs_ent_2` (see :attr:`symm_1` for details)."""
00267 if self._symm_2 is None:
00268 self._ComputeSymmetry()
00269 return self._symm_2
00270
00271 def SetSymmetries(self, symm_1, symm_2):
00272 """Set user-provided symmetry groups.
00273
00274 These groups are restricted to chain names appearing in :attr:`ent_to_cm_1`
00275 and :attr:`ent_to_cm_2` respectively. They are only valid if they cover all
00276 chains and both *symm_1* and *symm_2* have same lengths of symmetry group
00277 tuples. Otherwise trivial symmetry group used (see :attr:`symm_1`).
00278
00279 :param symm_1: Value to set for :attr:`symm_1`.
00280 :param symm_2: Value to set for :attr:`symm_2`.
00281 """
00282
00283 self._symm_1 = _CleanUserSymmetry(symm_1, self.ent_to_cm_1)
00284 self._symm_2 = _CleanUserSymmetry(symm_2, self.ent_to_cm_2)
00285
00286 if not _AreValidSymmetries(self._symm_1, self._symm_2):
00287 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
00288 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
00289
00290 @property
00291 def chain_mapping(self):
00292 """Mapping from :attr:`ent_to_cm_1` to :attr:`ent_to_cm_2`.
00293
00294 Properties:
00295
00296 - Mapping is between chains of same chem. group (see :attr:`chem_mapping`)
00297 - Each chain can appear only once in mapping
00298 - All chains of complex with less chains are mapped
00299 - Symmetry (:attr:`symm_1`, :attr:`symm_2`) is taken into account
00300
00301 Details on algorithms used to find mapping:
00302
00303 - We try all pairs of chem. mapped chains within symmetry group and get
00304 superpose-transformation for them
00305 - First option: check for "sufficient overlap" of other chain-pairs
00306
00307 - For each chain-pair defined above: apply superposition to full oligomer
00308 and map chains based on structural overlap
00309 - Structural overlap = X% of residues in second oligomer covered within Y
00310 Angstrom of a (chem. mapped) chain in first oligomer. We successively
00311 try (X,Y) = (80,4), (40,6) and (20,8) to be less and less strict in
00312 mapping (warning shown for most permissive one).
00313 - If multiple possible mappings are found, we choose the one which leads
00314 to the lowest multi-chain-RMSD given the superposition
00315
00316 - Fallback option: try all mappings to find minimal multi-chain-RMSD
00317 (warning shown)
00318
00319 - For each chain-pair defined above: apply superposition, try all (!)
00320 possible chain mappings (within symmetry group) and keep mapping with
00321 lowest multi-chain-RMSD
00322 - Repeat procedure above to resolve symmetry. Within the symmetry group we
00323 can use the chain mapping computed before and we just need to find which
00324 symmetry group in first oligomer maps to which in the second one. We
00325 again try all possible combinations...
00326 - Limitations:
00327
00328 - Trying all possible mappings is a combinatorial nightmare (factorial).
00329 We throw an exception if too many combinations (e.g. octomer vs
00330 octomer with no usable symmetry)
00331 - The mapping is forced: the "best" mapping will be chosen independently
00332 of how badly they fit in terms of multi-chain-RMSD
00333 - As a result, such a forced mapping can lead to a large range of
00334 resulting QS scores. An extreme example was observed between 1on3.1
00335 and 3u9r.1, where :attr:`global_score` can range from 0.12 to 0.43
00336 for mappings with very similar multi-chain-RMSD.
00337
00338 :getter: Computed on first use (cached)
00339 :type: :class:`dict` with key / value = :class:`str` (chain names, key
00340 for :attr:`ent_to_cm_1`, value for :attr:`ent_to_cm_2`)
00341 :raises: :class:`QSscoreError` if there are too many combinations to check
00342 to find a chain mapping.
00343 """
00344 if self._chain_mapping is None:
00345 self._chain_mapping = _GetChainMapping(self.ent_to_cm_1, self.ent_to_cm_2,
00346 self.symm_1, self.symm_2,
00347 self.chem_mapping)
00348 LogInfo('Mapping found: %s' % str(self._chain_mapping))
00349 return self._chain_mapping
00350
00351 @property
00352 def alignments(self):
00353 """List of successful sequence alignments using :attr:`chain_mapping`.
00354
00355 There will be one alignment for each mapped chain and they are ordered by
00356 their chain names in :attr:`qs_ent_1`.
00357
00358 The sequences of the alignments have views attached into
00359 :attr:`QSscoreEntity.ent` of :attr:`qs_ent_1` and :attr:`qs_ent_2`.
00360
00361 :getter: Computed on first use (cached)
00362 :type: :class:`list` of :class:`~ost.seq.AlignmentHandle`
00363 """
00364 if self._alignments is None:
00365 self._alignments = _GetMappedAlignments(self.qs_ent_1.ent,
00366 self.qs_ent_2.ent,
00367 self.chain_mapping)
00368 return self._alignments
00369
00370 @property
00371 def mapped_residues(self):
00372 """Mapping of shared residues in :attr:`alignments`.
00373
00374 :getter: Computed on first use (cached)
00375 :type: :class:`dict` *mapped_residues[c1][r1] = r2* with:
00376 *c1* = Chain name in first entity (= first sequence in aln),
00377 *r1* = Residue number in first entity,
00378 *r2* = Residue number in second entity
00379 """
00380 if self._mapped_residues is None:
00381 self._mapped_residues = _GetMappedResidues(self.alignments)
00382 return self._mapped_residues
00383
00384 @property
00385 def global_score(self):
00386 """QS-score with penalties.
00387
00388 The range of the score is between 0 (i.e. no interface residues are shared
00389 between biounits) and 1 (i.e. the interfaces are identical).
00390
00391 The global QS-score is computed applying penalties when interface residues
00392 or entire chains are missing (i.e. anything that is not mapped in
00393 :attr:`mapped_residues` / :attr:`chain_mapping`) in one of the biounits.
00394
00395 :getter: Computed on first use (cached)
00396 :type: :class:`float`
00397 """
00398 if self._global_score is None:
00399 self._ComputeScores()
00400 return self._global_score
00401
00402 @property
00403 def best_score(self):
00404 """QS-score without penalties.
00405
00406 Like :attr:`global_score`, but neglecting additional residues or chains in
00407 one of the biounits (i.e. the score is calculated considering only mapped
00408 chains and residues).
00409
00410 :getter: Computed on first use (cached)
00411 :type: :class:`float`
00412 """
00413 if self._best_score is None:
00414 self._ComputeScores()
00415 return self._best_score
00416
00417 @property
00418 def superposition(self):
00419 """Superposition result based on shared CA atoms in :attr:`alignments`.
00420
00421 The superposition can be used to map :attr:`QSscoreEntity.ent` of
00422 :attr:`qs_ent_1` onto the one of :attr:`qs_ent_2`. Use
00423 :func:`ost.geom.Invert` if you need the opposite transformation.
00424
00425 :getter: Computed on first use (cached)
00426 :type: :class:`ost.mol.alg.SuperpositionResult`
00427 """
00428 if self._superposition is None:
00429 self._superposition = _GetQsSuperposition(self.alignments)
00430
00431 sup_rmsd = self._superposition.rmsd
00432 cmp_view = self._superposition.view1
00433 LogInfo('CA RMSD for %s aligned residues on %s chains: %.2f' \
00434 % (cmp_view.residue_count, cmp_view.chain_count, sup_rmsd))
00435 return self._superposition
00436
00437 @property
00438 def lddt_score(self):
00439 """The multi-chain lDDT score.
00440
00441 .. note::
00442
00443 lDDT is not considering over-prediction (i.e. extra chains) and hence is
00444 not symmetric. Here, we consider :attr:`qs_ent_1` as the reference and
00445 :attr:`qs_ent_2` as the model. The alignments from :attr:`alignments` are
00446 used to map residue numbers and chains.
00447
00448 The score is computed with OST's :func:`~ost.mol.alg.LocalDistDiffTest`
00449 function with a single distance threshold of 2 A and an inclusion radius of
00450 8 A. You can use :attr:`lddt_mdl` and :attr:`lddt_ref` to get entities on
00451 which you can call any other lDDT function with any other set of parameters.
00452
00453 :getter: Computed on first use (cached)
00454 :type: :class:`float`
00455 """
00456 if self._lddt_score is None:
00457 self._ComputeLDDT()
00458 return self._lddt_score
00459
00460 @property
00461 def lddt_mdl(self):
00462 """The model entity used for lDDT scoring (:attr:`lddt_score`) and annotated
00463 with local scores.
00464
00465 Local scores are available as residue properties named 'lddt' and on each
00466 atom as a B-factor. Only CA atoms are considered if :attr:`calpha_only` is
00467 True, otherwise this is an all-atom score.
00468
00469 Since, the lDDT computation requires a single chain with mapped residue
00470 numbering, all chains are appended into a single chain X with unique residue
00471 numbers according to the column-index in the alignment. The alignments are
00472 in the same order as they appear in :attr:`alignments`. Additional residues
00473 are appended at the end of the chain with unique residue numbers.
00474
00475 :getter: Computed on first use (cached)
00476 :type: :class:`~ost.mol.EntityHandle`
00477 """
00478 if self._lddt_mdl is None:
00479 self._ComputeLDDT()
00480 return self._lddt_mdl
00481
00482 @property
00483 def lddt_ref(self):
00484 """The reference entity used for lDDT scoring (:attr:`lddt_score`).
00485
00486 This is a single chain X with residue numbers matching ones in
00487 :attr:`lddt_mdl` where aligned and unique numbers for additional residues.
00488
00489 :getter: Computed on first use (cached)
00490 :type: :class:`~ost.mol.EntityHandle`
00491 """
00492 if self._lddt_ref is None:
00493 self._ComputeLDDT()
00494 return self._lddt_ref
00495
00496 @property
00497 def clustalw_bin(self):
00498 """
00499 Full path to ``clustalw`` or ``clustalw2`` executable to use for multiple
00500 sequence alignments (unless :attr:`chain_mapping` is provided manually).
00501
00502 :getter: Located in path on first use (cached)
00503 :type: :class:`str`
00504 """
00505 if self._clustalw_bin is None:
00506 self._clustalw_bin = settings.Locate(('clustalw', 'clustalw2'))
00507 return self._clustalw_bin
00508
00509
00510
00511
00512
00513 def _ComputeAlignedEntities(self):
00514 """Fills cached ent_to_cm_1 and ent_to_cm_2."""
00515
00516 ev1, ev2 = _GetAlignedResidues(self.qs_ent_1, self.qs_ent_2,
00517 self.chem_mapping,
00518 self.max_ca_per_chain_for_cm,
00519 self.clustalw_bin)
00520
00521 self._ent_to_cm_1 = mol.CreateEntityFromView(ev1, True)
00522 self._ent_to_cm_2 = mol.CreateEntityFromView(ev2, True)
00523
00524 self._ent_to_cm_1.SetName(self.qs_ent_1.GetName())
00525 self._ent_to_cm_2.SetName(self.qs_ent_2.GetName())
00526
00527 def _ComputeSymmetry(self):
00528 """Fills cached symm_1 and symm_2."""
00529
00530 self._symm_1, self._symm_2 = \
00531 _FindSymmetry(self.qs_ent_1, self.qs_ent_2, self.ent_to_cm_1,
00532 self.ent_to_cm_2, self.chem_mapping)
00533
00534 if not _AreValidSymmetries(self._symm_1, self._symm_2):
00535 self._symm_1 = [tuple(ch.name for ch in self.ent_to_cm_1.chains)]
00536 self._symm_2 = [tuple(ch.name for ch in self.ent_to_cm_2.chains)]
00537
00538 def _ComputeScores(self):
00539 """Fills cached global_score and best_score."""
00540
00541 if self.calpha_only:
00542 contacts_1 = self.qs_ent_1.contacts_ca
00543 contacts_2 = self.qs_ent_2.contacts_ca
00544 else:
00545 contacts_1 = self.qs_ent_1.contacts
00546 contacts_2 = self.qs_ent_2.contacts
00547
00548 scores = _GetScores(contacts_1, contacts_2, self.mapped_residues,
00549 self.chain_mapping)
00550 self._best_score = scores[0]
00551 self._global_score = scores[1]
00552
00553 LogInfo('QSscore %s, %s: best: %.2f, global: %.2f' \
00554 % (self.qs_ent_1.GetName(), self.qs_ent_2.GetName(),
00555 self._best_score, self._global_score))
00556
00557 def _ComputeLDDT(self):
00558 """Fills cached lddt_score, lddt_mdl and lddt_ref."""
00559 LogInfo('Computing lDDT score')
00560
00561 ref, mdl = self.qs_ent_1.ent, self.qs_ent_2.ent
00562 LogInfo('Reference %s has: %s chains' % (ref.GetName(), ref.chain_count))
00563 LogInfo('Model %s has: %s chains' % (mdl.GetName(), mdl.chain_count))
00564 if mdl.chain_count > ref.chain_count:
00565 LogWarning('MODEL contains more chains than REFERENCE, '
00566 'lDDT is not considering them')
00567
00568 self._lddt_ref, self._lddt_mdl = \
00569 _MergeAlignedChains(self.alignments, ref, mdl, self.calpha_only)
00570
00571 self._lddt_score = _ComputeLDDTScore(self._lddt_ref, self._lddt_mdl)
00572
00573
00574
00575
00576
00577
00578 class QSscoreEntity(object):
00579 """Entity with cached entries for QS scoring.
00580
00581 Any known / precomputed information can be filled into the appropriate
00582 attribute here as long as they are labelled as read/write. Otherwise the
00583 quantities are computed on first access and cached (lazy evaluation). The
00584 heaviest load is expected when computing :attr:`contacts` and
00585 :attr:`contacts_ca`.
00586
00587 :param ent: Entity to be used for QS scoring. A copy of it will be processed.
00588 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
00589
00590 .. attribute:: is_valid
00591
00592 True, if successfully initialized. False, if input structure is monomer or
00593 has less than 2 protein chains with >= 20 residues.
00594
00595 :type: :class:`bool`
00596
00597 .. attribute:: original_name
00598
00599 Name set for *ent* when object was created.
00600
00601 :type: :class:`str`
00602
00603 .. attribute:: ent
00604
00605 Cleaned version of *ent* passed at construction. Hydrogens are removed, the
00606 entity is processed with a :class:`~ost.conop.RuleBasedProcessor` and chains
00607 listed in :attr:`removed_chains` have been removed. The name of this entity
00608 might change during scoring (see :func:`GetName`). Otherwise, this will be
00609 fixed.
00610
00611 :type: :class:`~ost.mol.EntityHandle`
00612
00613 .. attribute:: removed_chains
00614
00615 Chains removed from *ent* passed at construction. These are ligand and water
00616 chains as well as small (< 20 res.) peptides or chains with no amino acids
00617 (determined by chem. type, which is set by rule based processor).
00618
00619 :type: :class:`list` of :class:`str`
00620
00621 .. attribute:: calpha_only
00622
00623 Whether entity is CA-only (i.e. it has 0 CB atoms)
00624
00625 :type: :class:`bool`
00626 """
00627 def __init__(self, ent):
00628
00629 self.original_name = ent.GetName()
00630 ent = mol.CreateEntityFromView(ent.Select('ele!=H and aname!=HN'), True)
00631 if not conop.GetDefaultLib():
00632 raise RuntimeError("QSscore computation requires a compound library!")
00633 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
00634 pr.Process(ent)
00635 self.ent, self.removed_chains, self.calpha_only = _CleanInputEntity(ent)
00636
00637 if self.ent.chain_count == 0:
00638 LogError('Bad input file: ' + ent.GetName() + '. No chains left after '
00639 'removing water, ligands and small peptides.')
00640 self.is_valid = False
00641 elif self.ent.chain_count == 1:
00642 LogError('Structure ' + ent.GetName() + ' is a monomer. '
00643 'QSscore is not defined for monomers.')
00644 self.is_valid = False
00645 else:
00646 self.is_valid = True
00647
00648 self._ca_entity = None
00649 self._ca_chains = None
00650 self._alignments = {}
00651 self._chem_groups = None
00652 self._angles = {}
00653 self._axis = {}
00654 self._contacts = None
00655 self._contacts_ca = None
00656
00657 def GetName(self):
00658 """Wrapper to :func:`~ost.mol.EntityHandle.GetName` of :attr:`ent`.
00659 This is used to uniquely identify the entity while scoring. The name may
00660 therefore change while :attr:`original_name` remains fixed.
00661 """
00662
00663 return self.ent.GetName()
00664
00665 def SetName(self, new_name):
00666 """Wrapper to :func:`~ost.mol.EntityHandle.SetName` of :attr:`ent`.
00667 Use this to change unique identifier while scoring (see :func:`GetName`).
00668 """
00669
00670 self.ent.SetName(new_name)
00671
00672 @property
00673 def ca_entity(self):
00674 """
00675 Reduced representation of :attr:`ent` with only CA atoms.
00676 This guarantees that each included residue has exactly one atom.
00677
00678 :getter: Computed on first use (cached)
00679 :type: :class:`~ost.mol.EntityHandle`
00680 """
00681 if self._ca_entity is None:
00682 self._ca_entity = _GetCAOnlyEntity(self.ent)
00683 return self._ca_entity
00684
00685 @property
00686 def ca_chains(self):
00687 """
00688 Map of chain names in :attr:`ent` to sequences with attached view to CA-only
00689 chains (into :attr:`ca_entity`). Useful for alignments and superpositions.
00690
00691 :getter: Computed on first use (cached)
00692 :type: :class:`dict` (key = :class:`str`,
00693 value = :class:`~ost.seq.SequenceHandle`)
00694 """
00695 if self._ca_chains is None:
00696 self._ca_chains = dict()
00697 ca_entity = self.ca_entity
00698 for ch in ca_entity.chains:
00699 self._ca_chains[ch.name] = seq.SequenceFromChain(ch.name, ch)
00700 return self._ca_chains
00701
00702 def GetAlignment(self, c1, c2):
00703 """Get sequence alignment of chain *c1* with chain *c2*.
00704 Computed on first use based on :attr:`ca_chains` (cached).
00705
00706 :param c1: Chain name for first chain to align.
00707 :type c1: :class:`str`
00708 :param c2: Chain name for second chain to align.
00709 :type c2: :class:`str`
00710 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
00711 """
00712 if (c1,c2) not in self._alignments:
00713 ca_chains = self.ca_chains
00714 self._alignments[(c1,c2)] = _AlignAtomSeqs(ca_chains[c1], ca_chains[c2])
00715 return self._alignments[(c1,c2)]
00716
00717 @property
00718 def chem_groups(self):
00719 """
00720 Intra-complex group of chemically identical (seq. id. > 95%) polypeptide
00721 chains as extracted from :attr:`ca_chains`. First chain in group is the one
00722 with the longest sequence.
00723
00724 :getter: Computed on first use (cached)
00725 :type: :class:`list` of :class:`list` of :class:`str` (chain names)
00726 """
00727 if self._chem_groups is None:
00728 self._chem_groups = _GetChemGroups(self, 95)
00729 LogInfo('Chemically equivalent chain-groups in %s: %s' \
00730 % (self.GetName(), str(self._chem_groups)))
00731 return self._chem_groups
00732
00733 def GetAngles(self, c1, c2):
00734 """Get Euler angles from superposition of chain *c1* with chain *c2*.
00735 Computed on first use based on :attr:`ca_chains` (cached).
00736
00737 :param c1: Chain name for first chain to superpose.
00738 :type c1: :class:`str`
00739 :param c2: Chain name for second chain to superpose.
00740 :type c2: :class:`str`
00741 :return: 3 Euler angles (may contain nan if something fails).
00742 :rtype: :class:`numpy.array`
00743 """
00744 if (c1,c2) not in self._angles:
00745 self._GetSuperposeData(c1, c2)
00746 return self._angles[(c1,c2)]
00747
00748 def GetAxis(self, c1, c2):
00749 """Get axis of symmetry from superposition of chain *c1* with chain *c2*.
00750 Computed on first use based on :attr:`ca_chains` (cached).
00751
00752 :param c1: Chain name for first chain to superpose.
00753 :type c1: :class:`str`
00754 :param c2: Chain name for second chain to superpose.
00755 :type c2: :class:`str`
00756 :return: Rotational axis (may contain nan if something fails).
00757 :rtype: :class:`numpy.array`
00758 """
00759 if (c1,c2) not in self._axis:
00760 self._GetSuperposeData(c1, c2)
00761 return self._axis[(c1,c2)]
00762
00763 @property
00764 def contacts(self):
00765 """
00766 Connectivity dictionary (**read/write**).
00767 As given by :func:`GetContacts` with *calpha_only* = False on :attr:`ent`.
00768
00769 :getter: Computed on first use (cached)
00770 :setter: Uses :func:`FilterContacts` to ensure that we only keep contacts
00771 for chains in the cleaned entity.
00772 :type: See return type of :func:`GetContacts`
00773 """
00774 if self._contacts is None:
00775 self._contacts = GetContacts(self.ent, False)
00776 return self._contacts
00777
00778 @contacts.setter
00779 def contacts(self, new_contacts):
00780 chain_names = set([ch.name for ch in self.ent.chains])
00781 self._contacts = FilterContacts(new_contacts, chain_names)
00782
00783 @property
00784 def contacts_ca(self):
00785 """
00786 CA-only connectivity dictionary (**read/write**).
00787 Like :attr:`contacts` but with *calpha_only* = True in :func:`GetContacts`.
00788 """
00789 if self._contacts_ca is None:
00790 self._contacts_ca = GetContacts(self.ent, True)
00791 return self._contacts_ca
00792
00793 @contacts_ca.setter
00794 def contacts_ca(self, new_contacts):
00795 chain_names = set([ch.name for ch in self.ent.chains])
00796 self._contacts_ca = FilterContacts(new_contacts, chain_names)
00797
00798
00799
00800
00801
00802 def _GetSuperposeData(self, c1, c2):
00803 """Fill _angles and _axis from superposition of CA chains of c1 and c2."""
00804
00805 aln = self.GetAlignment(c1, c2)
00806 if not aln:
00807
00808 self._angles[(c1,c2)] = np.empty(3) * np.nan
00809 self._axis[(c1,c2)] = np.empty(3) * np.nan
00810 return
00811 v1, v2 = seq.ViewsFromAlignment(aln)
00812 if v1.atom_count < 3:
00813
00814 self._angles[(c1,c2)] = np.empty(3) * np.nan
00815 self._axis[(c1,c2)] = np.empty(3) * np.nan
00816 return
00817
00818 sup_res = mol.alg.SuperposeSVD(v1, v2, apply_transform=False)
00819 Rt = sup_res.transformation
00820
00821 a,b,c = _GetAngles(Rt)
00822 self._angles[(c1,c2)] = np.asarray([a,b,c])
00823
00824 R3 = geom.Rotation3(Rt.ExtractRotation())
00825 self._axis[(c1,c2)] = np.asarray(R3.GetRotationAxis().data)
00826
00827
00828
00829
00830
00831 def FilterContacts(contacts, chain_names):
00832 """Filter contacts to contain only contacts for chains in *chain_names*.
00833
00834 :param contacts: Connectivity dictionary as produced by :func:`GetContacts`.
00835 :type contacts: :class:`dict`
00836 :param chain_names: Chain names to keep.
00837 :type chain_names: :class:`list` or (better) :class:`set`
00838 :return: New connectivity dictionary (format as in :func:`GetContacts`)
00839 :rtype: :class:`dict`
00840 """
00841
00842 filtered_contacts = dict()
00843 for c1 in contacts:
00844 if c1 in chain_names:
00845 new_contacts = dict()
00846 for c2 in contacts[c1]:
00847 if c2 in chain_names:
00848 new_contacts[c2] = contacts[c1][c2]
00849
00850 if new_contacts:
00851 filtered_contacts[c1] = new_contacts
00852 return filtered_contacts
00853
00854 def GetContacts(entity, calpha_only, dist_thr=12.0):
00855 """Get inter-chain contacts of a macromolecular entity.
00856
00857 Contacts are pairs of residues within a given distance belonging to different
00858 chains. They are stored once per pair and include the CA/CB-CA/CB distance.
00859
00860 :param entity: An entity to check connectivity for.
00861 :type entity: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
00862 :param calpha_only: If True, we only consider CA-CA distances. Else, we use CB
00863 unless the residue is a GLY.
00864 :type calpha_only: :class:`bool`
00865 :param dist_thr: Maximal CA/CB-CA/CB distance to be considered in contact.
00866 :type dist_thr: :class:`float`
00867 :return: A connectivity dictionary. A pair of residues with chain names
00868 *ch_name1* & *ch_name2* (*ch_name1* < *ch_name2*), residue numbers
00869 *res_num1* & *res_num2* and distance *dist* (<= *dist_thr*) are
00870 stored as *result[ch_name1][ch_name2][res_num1][res_num2]* = *dist*.
00871 :rtype: :class:`dict`
00872 """
00873
00874 if calpha_only:
00875 ev = entity.Select("aname=CA")
00876 else:
00877 ev = entity.Select("(rname=GLY and aname=CA) or aname=CB")
00878 ent = mol.CreateEntityFromView(ev, True)
00879
00880 contacts = dict()
00881 for atom in ent.atoms:
00882 ch_name1 = atom.chain.name
00883 res_num1 = atom.residue.number.num
00884 close_atoms = ent.FindWithin(atom.pos, dist_thr)
00885 for close_atom in close_atoms:
00886 ch_name2 = close_atom.chain.name
00887 if ch_name2 > ch_name1:
00888 res_num2 = close_atom.residue.number.num
00889 dist = geom.Distance(atom.pos, close_atom.pos)
00890
00891 if ch_name1 not in contacts:
00892 contacts[ch_name1] = dict()
00893 if ch_name2 not in contacts[ch_name1]:
00894 contacts[ch_name1][ch_name2] = dict()
00895 if res_num1 not in contacts[ch_name1][ch_name2]:
00896 contacts[ch_name1][ch_name2][res_num1] = dict()
00897 contacts[ch_name1][ch_name2][res_num1][res_num2] = round(dist, 3)
00898
00899 return contacts
00900
00901
00902
00903
00904
00905
00906
00907
00908 def _AlignAtomSeqs(seq_1, seq_2):
00909 """
00910 :type seq_1: :class:`ost.seq.SequenceHandle`
00911 :type seq_2: :class:`ost.seq.SequenceHandle`
00912 :return: Alignment of two sequences using a global aignment. Views attached
00913 to the input sequences will remain attached in the aln.
00914 :rtype: :class:`~ost.seq.AlignmentHandle` or None if it failed.
00915 """
00916
00917
00918 aln = None
00919 alns = seq.alg.GlobalAlign(seq_1, seq_2, seq.alg.BLOSUM62)
00920 if alns: aln = alns[0]
00921 if not aln:
00922 LogWarning('Failed to align %s to %s' % (seq_1.name, seq_2.name))
00923 LogWarning('%s: %s' % (seq_1.name, seq_1.string))
00924 LogWarning('%s: %s' % (seq_2.name, seq_2.string))
00925 return aln
00926
00927
00928
00929 def _CleanInputEntity(ent):
00930 """
00931 :param ent: The OST entity to be cleaned.
00932 :type ent: :class:`EntityHandle` or :class:`EntityView`
00933 :return: A tuple of 3 items: :attr:`QSscoreEntity.ent`,
00934 :attr:`QSscoreEntity.removed_chains`,
00935 :attr:`QSscoreEntity.calpha_only`
00936 """
00937
00938 removed_chains = []
00939 for ch in ent.chains:
00940
00941
00942 if ch.name in ['-', '_'] \
00943 or ch.residue_count < 20 \
00944 or not any(r.chem_type.IsAminoAcid() for r in ch.residues) \
00945 or not (set(r.one_letter_code for r in ch.residues) - {'?', 'X'}):
00946 removed_chains.append(ch.name)
00947
00948
00949 if removed_chains:
00950 chain_set = set()
00951 for ch_name in removed_chains:
00952 if ch_name in ['-', '_', ' ']:
00953 chain_set.add('"%c"' % ch_name)
00954 else:
00955 chain_set.add(ch_name)
00956 view = ent.Select('cname!=%s' % ','.join(chain_set))
00957 ent_new = mol.CreateEntityFromView(view, True)
00958 ent_new.SetName(ent.GetName())
00959 else:
00960 ent_new = ent
00961
00962
00963 calpha_only = False
00964 if ent_new.Select('aname=CB').atom_count == 0:
00965 LogInfo('Structure %s is a CA only structure!' % ent_new.GetName())
00966 calpha_only = True
00967
00968
00969 if removed_chains:
00970 LogInfo('Chains removed from %s: %s' \
00971 % (ent_new.GetName(), ''.join(removed_chains)))
00972 LogInfo('Chains in %s: %s' \
00973 % (ent_new.GetName(), ''.join([c.name for c in ent_new.chains])))
00974 return ent_new, removed_chains, calpha_only
00975
00976 def _GetCAOnlyEntity(ent):
00977 """
00978 :param ent: Entity to process
00979 :type ent: :class:`EntityHandle` or :class:`EntityView`
00980 :return: New entity with only CA and only one atom per residue
00981 (see :attr:`QSscoreEntity.ca_entity`)
00982 """
00983
00984 ca_view = ent.CreateEmptyView()
00985
00986 for res in ent.residues:
00987 ca_atom = res.FindAtom("CA")
00988 if ca_atom.IsValid():
00989 ca_view.AddAtom(ca_atom)
00990
00991 return mol.CreateEntityFromView(ca_view, False)
00992
00993 def _GetChemGroups(qs_ent, seqid_thr=95.):
00994 """
00995 :return: Intra-complex group of chemically identical polypeptide chains
00996 (see :attr:`QSscoreEntity.chem_groups`)
00997
00998 :param qs_ent: Entity to process
00999 :type qs_ent: :class:`QSscoreEntity`
01000 :param seqid_thr: Threshold used to decide when two chains are identical.
01001 95 percent tolerates the few mutations crystallographers
01002 like to do.
01003 :type seqid_thr: :class:`float`
01004 """
01005
01006 ca_chains = qs_ent.ca_chains
01007 chain_names = sorted(ca_chains.keys())
01008
01009
01010
01011 id_seqs = []
01012 for ch_1, ch_2 in itertools.combinations(chain_names, 2):
01013 aln = qs_ent.GetAlignment(ch_1, ch_2)
01014 if aln and seq.alg.SequenceIdentity(aln) > seqid_thr:
01015 id_seqs.append((ch_1, ch_2))
01016
01017 if not id_seqs:
01018 return [[name] for name in chain_names]
01019
01020
01021 groups = []
01022 for ch_1, ch_2 in id_seqs:
01023 found = False
01024 for g in groups:
01025 if ch_1 in g or ch_2 in g:
01026 found = True
01027 g.add(ch_1)
01028 g.add(ch_2)
01029 if not found:
01030 groups.append(set([ch_1, ch_2]))
01031
01032 chem_groups = []
01033 for g in groups:
01034 ranked_g = sorted([(-ca_chains[ch].length, ch) for ch in g])
01035 chem_groups.append([ch for _,ch in ranked_g])
01036
01037 for ch in chain_names:
01038 if not any(ch in g for g in chem_groups):
01039 chem_groups.append([ch])
01040
01041 return chem_groups
01042
01043 def _GetAngles(Rt):
01044 """Computes the Euler angles given a transformation matrix.
01045
01046 :param Rt: Rt operator.
01047 :type Rt: :class:`ost.geom.Mat4`
01048 :return: A :class:`tuple` of angles for each axis (x,y,z)
01049 """
01050 rot = np.asmatrix(Rt.ExtractRotation().data).reshape(3,3)
01051 tx = np.arctan2(rot[2,1], rot[2,2])
01052 if tx < 0:
01053 tx += 2*np.pi
01054 ty = np.arctan2(rot[2,0], np.sqrt(rot[2,1]**2 + rot[2,2]**2))
01055 if ty < 0:
01056 ty += 2*np.pi
01057 tz = np.arctan2(rot[1,0], rot[0,0])
01058 if tz < 0:
01059 tz += 2*np.pi
01060 return tx,ty,tz
01061
01062
01063
01064 def _GetChemGroupsMapping(qs_ent_1, qs_ent_2):
01065 """
01066 :return: Inter-complex mapping of chemical groups
01067 (see :attr:`QSscorer.chem_mapping`)
01068
01069 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01070 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01071 """
01072
01073 chem_groups_1 = qs_ent_1.chem_groups
01074 chem_groups_2 = qs_ent_2.chem_groups
01075 repr_chains_1 = {x[0]: tuple(x) for x in chem_groups_1}
01076 repr_chains_2 = {x[0]: tuple(x) for x in chem_groups_2}
01077
01078
01079
01080 swapped = False
01081 if len(repr_chains_2) < len(repr_chains_1):
01082 repr_chains_1, repr_chains_2 = repr_chains_2, repr_chains_1
01083 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
01084 swapped = True
01085
01086
01087
01088
01089
01090
01091
01092 chain_pairs = []
01093 ca_chains_1 = qs_ent_1.ca_chains
01094 ca_chains_2 = qs_ent_2.ca_chains
01095 for ch_1 in repr_chains_1.keys():
01096 for ch_2 in repr_chains_2.keys():
01097 aln = _AlignAtomSeqs(ca_chains_1[ch_1], ca_chains_2[ch_2])
01098 if aln:
01099 chains_seqid = seq.alg.SequenceIdentity(aln)
01100 LogVerbose('Sequence identity', ch_1, ch_2, 'seqid=%.2f' % chains_seqid)
01101 chain_pairs.append((chains_seqid, ch_1, ch_2))
01102
01103
01104 chain_pairs = sorted(chain_pairs, reverse=True)
01105 chem_mapping = {}
01106 for _, c1, c2 in chain_pairs:
01107 skip = False
01108 for a,b in chem_mapping.iteritems():
01109 if repr_chains_1[c1] == a or repr_chains_2[c2] == b:
01110 skip = True
01111 break
01112 if not skip:
01113 chem_mapping[repr_chains_1[c1]] = repr_chains_2[c2]
01114 if swapped:
01115 chem_mapping = {y: x for x, y in chem_mapping.iteritems()}
01116 qs_ent_1, qs_ent_2 = qs_ent_2, qs_ent_1
01117
01118
01119 mapped_1 = set([i for s in chem_mapping.keys() for i in s])
01120 chains_1 = set([c.name for c in qs_ent_1.ent.chains])
01121 if chains_1 - mapped_1:
01122 LogWarning('Unmapped Chains in %s: %s'
01123 % (qs_ent_1.GetName(), ','.join(list(chains_1 - mapped_1))))
01124
01125 mapped_2 = set([i for s in chem_mapping.values() for i in s])
01126 chains_2 = set([c.name for c in qs_ent_2.ent.chains])
01127 if chains_2 - mapped_2:
01128 LogWarning('Unmapped Chains in %s: %s'
01129 % (qs_ent_2.GetName(), ','.join(list(chains_2 - mapped_2))))
01130
01131
01132 LogInfo('Chemical chain-groups mapping: ' + str(chem_mapping))
01133 if len(mapped_1) < 2 or len(mapped_2) < 2:
01134 raise QSscoreError('Less than 2 chains left in chem_mapping.')
01135 return chem_mapping
01136
01137 def _SelectFew(l, max_elements):
01138 """Return l or copy of l with at most *max_elements* entries."""
01139 n_el = len(l)
01140 if n_el <= max_elements:
01141 return l
01142 else:
01143
01144 d_el = ((n_el - 1) // max_elements) + 1
01145 new_l = list()
01146 for i in range(0, n_el, d_el):
01147 new_l.append(l[i])
01148 return new_l
01149
01150 def _GetAlignedResidues(qs_ent_1, qs_ent_2, chem_mapping, max_ca_per_chain,
01151 clustalw_bin):
01152 """
01153 :return: Tuple of two :class:`~ost.mol.EntityView` objects containing subsets
01154 of *qs_ent_1* and *qs_ent_2*. Two entities are later created from
01155 those views (see :attr:`QSscorer.ent_to_cm_1` and
01156 :attr:`QSscorer.ent_to_cm_2`)
01157
01158 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01159 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01160 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01161 :param max_ca_per_chain: See :attr:`QSscorer.max_ca_per_chain_for_cm`
01162 """
01163
01164 ent_view_1 = qs_ent_1.ca_entity.CreateEmptyView()
01165 ent_view_2 = qs_ent_2.ca_entity.CreateEmptyView()
01166 ca_chains_1 = qs_ent_1.ca_chains
01167 ca_chains_2 = qs_ent_2.ca_chains
01168
01169 for group_1, group_2 in chem_mapping.iteritems():
01170
01171 seq_list = seq.CreateSequenceList()
01172
01173 seq_to_empty_view = dict()
01174 for ch in group_1:
01175 sequence = ca_chains_1[ch].Copy()
01176 sequence.name = qs_ent_1.GetName() + '.' + ch
01177 seq_to_empty_view[sequence.name] = ent_view_1
01178 seq_list.AddSequence(sequence)
01179 for ch in group_2:
01180 sequence = ca_chains_2[ch].Copy()
01181 sequence.name = qs_ent_2.GetName() + '.' + ch
01182 seq_to_empty_view[sequence.name] = ent_view_2
01183 seq_list.AddSequence(sequence)
01184 alnc = ClustalW(seq_list, clustalw=clustalw_bin)
01185
01186
01187 residue_lists = list()
01188 sequence_count = alnc.sequence_count
01189 for col in alnc:
01190 residues = [col.GetResidue(ir) for ir in range(sequence_count)]
01191 if all([r.IsValid() for r in residues]):
01192 residue_lists.append(residues)
01193
01194 if len(residue_lists) < 5:
01195 raise QSscoreError('Not enough aligned residues.')
01196 elif max_ca_per_chain > 0:
01197 residue_lists = _SelectFew(residue_lists, max_ca_per_chain)
01198
01199 res_views = [seq_to_empty_view[alnc.sequences[ir].name] \
01200 for ir in range(sequence_count)]
01201
01202 for res_list in residue_lists:
01203 for res, view in zip(res_list, res_views):
01204 view.AddResidue(res, mol.ViewAddFlag.INCLUDE_ATOMS)
01205
01206 return ent_view_1, ent_view_2
01207
01208
01209 def _FindSymmetry(qs_ent_1, qs_ent_2, ent_to_cm_1, ent_to_cm_2, chem_mapping):
01210 """
01211 :return: A pair of comparable symmetry groups (for :attr:`QSscorer.symm_1`
01212 and :attr:`QSscorer.symm_2`) between the two structures.
01213 Empty lists if no symmetry identified.
01214
01215 :param qs_ent_1: See :attr:`QSscorer.qs_ent_1`
01216 :param qs_ent_2: See :attr:`QSscorer.qs_ent_2`
01217 :param ent_to_cm_1: See :attr:`QSscorer.ent_to_cm_1`
01218 :param ent_to_cm_2: See :attr:`QSscorer.ent_to_cm_2`
01219 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01220 """
01221 LogInfo('Identifying Symmetry Groups...')
01222
01223
01224 symm_subg_1 = _GetSymmetrySubgroups(qs_ent_1, ent_to_cm_1,
01225 chem_mapping.keys())
01226 symm_subg_2 = _GetSymmetrySubgroups(qs_ent_2, ent_to_cm_2,
01227 chem_mapping.values())
01228
01229
01230 LogInfo('Selecting Symmetry Groups...')
01231
01232
01233
01234
01235 best_symm = []
01236 for symm_1, symm_2 in itertools.product(symm_subg_1, symm_subg_2):
01237 s1 = symm_1[0]
01238 s2 = symm_2[0]
01239 if len(s1) != len(s2):
01240 LogDebug('Discarded', str(symm_1), str(symm_2),
01241 ': different size of symmetry groups')
01242 continue
01243 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
01244 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
01245 LogDebug('OK', str(symm_1), str(symm_2), ': %s mappings' % nr_mapp)
01246 best_symm.append((nr_mapp, symm_1, symm_2))
01247
01248 for _, symm_1, symm_2 in sorted(best_symm):
01249 s1 = symm_1[0]
01250 s2 = symm_2[0]
01251 group_1 = ent_to_cm_1.Select('cname=%s' % ','.join(s1))
01252 group_2 = ent_to_cm_2.Select('cname=%s' % ','.join(s2))
01253
01254
01255
01256 closed_symm = _CheckClosedSymmetry(group_1, group_2, symm_1, symm_2,
01257 chem_mapping, 6, 0.8, find_best=False)
01258
01259 if closed_symm:
01260 return symm_1, symm_2
01261
01262
01263 LogInfo('No coherent symmetry identified between structures')
01264 return [], []
01265
01266
01267 def _GetChainMapping(ent_1, ent_2, symm_1, symm_2, chem_mapping):
01268 """
01269 :return: Mapping from *ent_1* to *ent_2* (see :attr:`QSscorer.chain_mapping`)
01270
01271 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
01272 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
01273 :param symm_1: See :attr:`QSscorer.symm_1`
01274 :param symm_2: See :attr:`QSscorer.symm_2`
01275 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01276 """
01277 LogInfo('Symmetry-groups used in %s: %s' % (ent_1.GetName(), str(symm_1)))
01278 LogInfo('Symmetry-groups used in %s: %s' % (ent_2.GetName(), str(symm_2)))
01279
01280
01281 thresholds = [( 'strict', 4, 0.8),
01282 ( 'tolerant', 6, 0.4),
01283 ('permissive', 8, 0.2)]
01284 for scheme, sup_thr, sup_fract in thresholds:
01285
01286
01287
01288 mapping = _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2,
01289 chem_mapping, sup_thr, sup_fract)
01290 if mapping:
01291 LogInfo('Closed Symmetry with %s parameters' % scheme)
01292 if scheme == 'permissive':
01293 LogWarning('Permissive thresholds used for overlap based mapping ' + \
01294 'detection: check mapping manually: %s' % mapping)
01295 return mapping
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309 count = _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping)
01310 LogInfo('Intra Symmetry-group mappings to check: %s' \
01311 % count['intra']['mappings'])
01312 LogInfo('Inter Symmetry-group mappings to check: %s' \
01313 % count['inter']['mappings'])
01314 nr_mapp = count['intra']['mappings'] + count['inter']['mappings']
01315 if nr_mapp > 100000:
01316 raise QSscoreError('Too many possible mappings: %s' % nr_mapp)
01317
01318
01319 cached_rmsd = _CachedRMSD(ent_1, ent_2)
01320
01321
01322 check = 0
01323 intra_mappings = []
01324
01325 ref_symm_1 = symm_1[0]
01326 ref_symm_2 = symm_2[0]
01327 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01328
01329 for g1, g2 in asu_chem_mapping.iteritems():
01330
01331 for c1, c2 in itertools.product(g1, g2):
01332
01333 LogVerbose('Superposing chains: %s, %s' % (c1, c2))
01334 res = cached_rmsd.GetSuperposition(c1, c2)
01335
01336 cur_mappings = []
01337 for mapping in _ListPossibleMappings(c1, c2, asu_chem_mapping):
01338 check += 1
01339 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
01340 cur_mappings.append((chains_rmsd, mapping))
01341 LogVerbose(str(mapping), str(chains_rmsd))
01342 best_rmsd, best_mapp = min(cur_mappings)
01343 intra_mappings.append((best_rmsd, c1, c2, best_mapp))
01344
01345 _, intra_asu_c1, intra_asu_c2, intra_asu_mapp = min(intra_mappings)
01346
01347
01348 if len(symm_1) == 1 or len(symm_2) == 1:
01349 mapping = intra_asu_mapp
01350 else:
01351
01352
01353
01354 index_asu_c1 = ref_symm_1.index(intra_asu_c1)
01355 index_asu_c2 = ref_symm_2.index(intra_asu_c2)
01356 index_mapp = {ref_symm_1.index(s1): ref_symm_2.index(s2) \
01357 for s1, s2 in intra_asu_mapp.iteritems()}
01358 LogInfo('Intra symmetry-group mapping: %s' % str(intra_asu_mapp))
01359
01360
01361
01362 if len(symm_1) < len(symm_2):
01363 symm_combinations = list(itertools.product(symm_1, [symm_2[0]]))
01364 else:
01365 symm_combinations = list(itertools.product([symm_1[0]], symm_2))
01366
01367 full_asu_mapp = {tuple(symm_1): tuple(symm_2)}
01368 full_mappings = []
01369 for s1, s2 in symm_combinations:
01370
01371 LogVerbose('Superposing symmetry-group: %s, %s in %s, %s' \
01372 % (s1[index_asu_c1], s2[index_asu_c2], s1, s2))
01373 res = cached_rmsd.GetSuperposition(s1[index_asu_c1], s2[index_asu_c2])
01374
01375 for asu_mapp in _ListPossibleMappings(s1, s2, full_asu_mapp):
01376 check += 1
01377
01378 mapping = {}
01379 for a, b in asu_mapp.iteritems():
01380 for id_a, id_b in index_mapp.iteritems():
01381 mapping[a[id_a]] = b[id_b]
01382 chains_rmsd = cached_rmsd.GetMappedRMSD(mapping, res.transformation)
01383 full_mappings.append((chains_rmsd, mapping))
01384 LogVerbose(str(mapping), str(chains_rmsd))
01385
01386 if check != nr_mapp:
01387 LogWarning('Mapping number estimation was wrong')
01388
01389
01390 mapping = min(full_mappings)[1]
01391
01392 LogWarning('Extensive search used for mapping detection (fallback). This ' + \
01393 'approach has known limitations. Check mapping manually: %s' \
01394 % mapping)
01395 return mapping
01396
01397
01398 def _GetSymmetrySubgroups(qs_ent, ent, chem_groups):
01399 """
01400 Identify the symmetry (either cyclic C or dihedral D) of the protein and find
01401 all possible symmetry subgroups. This is done testing all combination of chain
01402 superposition and clustering them by the angles (D) or the axis (C) of the Rt
01403 operator.
01404
01405 Groups of superposition which can fully reconstruct the structure are possible
01406 symmetry subgroups.
01407
01408 :param qs_ent: Entity with cached angles and axis.
01409 :type qs_ent: :class:`QSscoreEntity`
01410 :param ent: Entity from which to extract chains (perfect alignment assumed
01411 for superposition as in :attr:`QSscorer.ent_to_cm_1`).
01412 :type ent: :class:`EntityHandle` or :class:`EntityView`
01413 :param chem_groups: List of tuples/lists of chain names in *ent*. Each list
01414 contains all chains belonging to a chem. group (could be
01415 from :attr:`QSscoreEntity.chem_groups` or from "keys()"
01416 or "values()" of :attr:`QSscorer.chem_mapping`)
01417
01418 :return: A list of possible symmetry subgroups (each in same format as
01419 :attr:`QSscorer.symm_1`). If no symmetry is found, we return a list
01420 with a single symmetry subgroup with a single group with all chains.
01421 """
01422
01423 angles = {}
01424 axis = {}
01425 for cnames in chem_groups:
01426 for c1, c2 in itertools.combinations(cnames, 2):
01427 cur_angles = qs_ent.GetAngles(c1, c2)
01428 if not np.any(np.isnan(cur_angles)):
01429 angles[(c1,c2)] = cur_angles
01430 cur_axis = qs_ent.GetAxis(c1, c2)
01431 if not np.any(np.isnan(cur_axis)):
01432 axis[(c1,c2)] = cur_axis
01433
01434
01435 symm_groups = []
01436 LogVerbose('Possible symmetry-groups for: %s' % ent.GetName())
01437 for angle_thr in np.arange(0.1, 1, 0.1):
01438 d_symm_groups = _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr)
01439 if d_symm_groups:
01440 for group in d_symm_groups:
01441 if group not in symm_groups:
01442 symm_groups.append(group)
01443 LogVerbose('Dihedral: %s' % group)
01444 LogInfo('Symmetry threshold %.1f used for angles of %s' \
01445 % (angle_thr, ent.GetName()))
01446 break
01447
01448
01449 for axis_thr in np.arange(0.1, 1, 0.1):
01450 c_symm_groups = _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr)
01451 if c_symm_groups:
01452 for group in c_symm_groups:
01453 if group not in symm_groups:
01454 symm_groups.append(group)
01455 LogVerbose('Cyclic: %s' % group)
01456 LogInfo('Symmetry threshold %.1f used for axis of %s' \
01457 % (axis_thr, ent.GetName()))
01458 break
01459
01460
01461 if not symm_groups:
01462 LogInfo('No symmetry-group detected in %s' % ent.GetName())
01463 symm_groups = [[tuple([c for g in chem_groups for c in g])]]
01464
01465 return symm_groups
01466
01467 def _GetDihedralSubgroups(ent, chem_groups, angles, angle_thr):
01468 """
01469 :return: Dihedral subgroups for :func:`_GetSymmetrySubgroups`
01470 (same return type as there). Empty list if fail.
01471
01472 :param ent: See :func:`_GetSymmetrySubgroups`
01473 :param chem_groups: See :func:`_GetSymmetrySubgroups`
01474 :param angles: :class:`dict` (key = chain-pair-tuple, value = Euler angles)
01475 :param angle_thr: Euler angles distance threshold for clustering (float).
01476 """
01477
01478 if len(angles) == 0:
01479
01480 return []
01481 else:
01482 same_angles = _ClusterData(angles, angle_thr, _AngleArrayDistance)
01483
01484
01485 symm_groups = []
01486 for clst in same_angles.values():
01487 group = clst.keys()
01488 if _ValidChainGroup(group, ent):
01489 if len(chem_groups) > 1:
01490
01491 symm_groups.append(zip(*group))
01492 else:
01493
01494 symm_groups.append(group)
01495 symm_groups.append(zip(*group))
01496 return symm_groups
01497
01498 def _GetCyclicSubgroups(ent, chem_groups, axis, axis_thr):
01499 """
01500 :return: Cyclic subgroups for :func:`_GetSymmetrySubgroups`
01501 (same return type as there). Empty list if fail.
01502
01503 :param ent: See :func:`_GetSymmetrySubgroups`
01504 :param chem_groups: See :func:`_GetSymmetrySubgroups`
01505 :param angles: :class:`dict` (key = chain-pair-tuple, value = rotation axis)
01506 :param angle_thr: Axis distance threshold for clustering (float).
01507 """
01508
01509 if len(axis) == 0:
01510
01511 return []
01512 else:
01513 same_axis = _ClusterData(axis, axis_thr, _AxisDistance)
01514
01515
01516 symm_groups = []
01517 for clst in same_axis.values():
01518 all_chain = [item for sublist in clst.keys() for item in sublist]
01519 if len(set(all_chain)) == ent.chain_count:
01520
01521 if len(chem_groups) > 1:
01522 ref_chem_group = chem_groups[0]
01523
01524 cyclic_group_closest = []
01525 cyclic_group_iface = []
01526 for c1 in ref_chem_group:
01527 group_closest = [c1]
01528 group_iface = [c1]
01529 for chains in chem_groups[1:]:
01530
01531 closest = _GetClosestChain(ent, c1, chains)
01532
01533 closest_iface = _GetClosestChainInterface(ent, c1, chains)
01534 group_closest.append(closest)
01535 group_iface.append(closest_iface)
01536 cyclic_group_closest.append(tuple(group_closest))
01537 cyclic_group_iface.append(tuple(group_iface))
01538 if _ValidChainGroup(cyclic_group_closest, ent):
01539 symm_groups.append(cyclic_group_closest)
01540 if _ValidChainGroup(cyclic_group_iface, ent):
01541 symm_groups.append(cyclic_group_iface)
01542
01543 else:
01544 symm_groups.append(chem_groups)
01545 return symm_groups
01546
01547 def _ClusterData(data, thr, metric):
01548 """Wrapper for fclusterdata to get dict of clusters.
01549
01550 :param data: :class:`dict` (keys for ID, values used for clustering)
01551 :return: :class:`dict` {cluster_idx: {data-key: data-value}}
01552 """
01553
01554 if len(data) == 1:
01555 return {0: {data.keys()[0]: data.values()[0]} }
01556
01557 cluster_indices = fclusterdata(np.asarray(data.values()), thr,
01558 method='complete', criterion='distance',
01559 metric=metric)
01560
01561 res = {}
01562 for cluster_idx, data_key in zip(cluster_indices, data.keys()):
01563 if not cluster_idx in res:
01564 res[cluster_idx] = {}
01565 res[cluster_idx][data_key] = data[data_key]
01566 return res
01567
01568 def _AngleArrayDistance(u, v):
01569 """
01570 :return: Average angular distance of two arrays of angles.
01571 :param u: Euler angles.
01572 :param v: Euler angles.
01573 """
01574 dist = []
01575 for a,b in zip(u,v):
01576 delta = abs(a-b)
01577 if delta > np.pi:
01578 delta = abs(2*np.pi - delta)
01579 dist.append(delta)
01580 return np.mean(dist)
01581
01582 def _AxisDistance(u, v):
01583 """
01584 :return: Euclidean distance between two rotation axes. Axes can point in
01585 either direction so we ensure to use the closer one.
01586 :param u: Rotation axis.
01587 :param v: Rotation axis.
01588 """
01589
01590 u = np.asarray(u)
01591 v = np.asarray(v)
01592
01593 dv1 = u - v
01594 dv2 = u + v
01595 d1 = np.dot(dv1, dv1)
01596 d2 = np.dot(dv2, dv2)
01597 return np.sqrt(min(d1, d2))
01598
01599 def _GetClosestChain(ent, ref_chain, chains):
01600 """
01601 :return: Chain closest to *ref_chain* based on center of atoms distance.
01602 :rtype: :class:`str`
01603 :param ent: See :func:`_GetSymmetrySubgroups`
01604 :param ref_chain: We look for chains closest to this one
01605 :type ref_chain: :class:`str`
01606 :param chains: We only consider these chains
01607 :type chains: :class:`list` of :class:`str`
01608 """
01609 contacts = []
01610 ref_center = ent.FindChain(ref_chain).center_of_atoms
01611 for ch in chains:
01612 center = ent.FindChain(ch).center_of_atoms
01613 contacts.append((geom.Distance(ref_center, center), ch))
01614 closest_chain = min(contacts)[1]
01615 return closest_chain
01616
01617 def _GetClosestChainInterface(ent, ref_chain, chains):
01618 """
01619 :return: Chain with biggest interface (within 10 A) to *ref_chain*.
01620 :rtype: :class:`str`
01621 :param ent: See :func:`_GetSymmetrySubgroups`
01622 :param ref_chain: We look for chains closest to this one
01623 :type ref_chain: :class:`str`
01624 :param chains: We only consider these chains
01625 :type chains: :class:`list` of :class:`str`
01626 """
01627
01628
01629 closest = []
01630 for ch in chains:
01631 iface_view = ent.Select('cname=%s and 10 <> [cname=%s]' % (ref_chain, ch))
01632 nr_res = iface_view.residue_count
01633 closest.append((nr_res, ch))
01634 closest_chain = max(closest)[1]
01635 return closest_chain
01636
01637 def _ValidChainGroup(group, ent):
01638 """
01639 :return: True, if *group* has unique chain names and as many chains as *ent*
01640 :rtype: :class:`bool`
01641 :param group: Symmetry groups to check
01642 :type group: Same as :attr:`QSscorer.symm_1`
01643 :param ent: See :func:`_GetSymmetrySubgroups`
01644 """
01645 all_chain = [item for sublist in group for item in sublist]
01646 if len(all_chain) == len(set(all_chain)) and\
01647 len(all_chain) == ent.chain_count:
01648 return True
01649 return False
01650
01651 def _LimitChemMapping(chem_mapping, limit_1, limit_2):
01652 """
01653 :return: Chem. mapping containing only chains in *limit_1* and *limit_2*
01654 :rtype: Same as :attr:`QSscorer.chem_mapping`
01655 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01656 :param limit_1: Limits chain names in chem_mapping.keys()
01657 :type limit_1: List/tuple of strings
01658 :param limit_2: Limits chain names in chem_mapping.values()
01659 :type limit_2: List/tuple of strings
01660 """
01661
01662 limit_1_set = set(limit_1)
01663 limit_2_set = set(limit_2)
01664 asu_chem_mapping = dict()
01665 for key, value in chem_mapping.iteritems():
01666 new_key = tuple(set(key) & limit_1_set)
01667 if new_key:
01668 asu_chem_mapping[new_key] = tuple(set(value) & limit_2_set)
01669 return asu_chem_mapping
01670
01671
01672 def _CountSuperpositionsAndMappings(symm_1, symm_2, chem_mapping):
01673 """
01674 :return: Dictionary of number of mappings and superpositions to be performed.
01675 Returned as *result[X][Y] = number* with X = "intra" or "inter" and
01676 Y = "mappings" or "superpositions". The idea is that for each
01677 pairwise superposition we check all possible mappings.
01678 We can check the combinations within (intra) a symmetry group and
01679 once established, we check the combinations between different (inter)
01680 symmetry groups.
01681 :param symm_1: See :attr:`QSscorer.symm_1`
01682 :param symm_2: See :attr:`QSscorer.symm_2`
01683 :param chem_mapping: See :attr:`QSscorer.chem_mapping`
01684 """
01685
01686 c = {}
01687 c['intra'] = {}
01688 c['inter'] = {}
01689 c['intra']['mappings'] = 0
01690 c['intra']['superpositions'] = 0
01691 c['inter']['mappings'] = 0
01692 c['inter']['superpositions'] = 0
01693
01694 ref_symm_1 = symm_1[0]
01695 ref_symm_2 = symm_2[0]
01696 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01697 for g1, g2 in asu_chem_mapping.iteritems():
01698 chain_superpositions = len(g1) * len(g2)
01699 c['intra']['superpositions'] += chain_superpositions
01700 map_per_sup = _PermutationOrCombinations(g1[0], g2[0], asu_chem_mapping)
01701 c['intra']['mappings'] += chain_superpositions * map_per_sup
01702 if len(symm_1) == 1 or len(symm_2) == 1:
01703 return c
01704
01705 asu_superposition = min(len(symm_1), len(symm_2))
01706 c['inter']['superpositions'] = asu_superposition
01707 asu = {tuple(symm_1): tuple(symm_2)}
01708 map_per_sup = _PermutationOrCombinations(ref_symm_1, ref_symm_2, asu)
01709 c['inter']['mappings'] = asu_superposition * map_per_sup
01710 return c
01711
01712 def _PermutationOrCombinations(sup1, sup2, chem_mapping):
01713 """Should match len(_ListPossibleMappings(sup1, sup2, chem_mapping))."""
01714
01715 chem_map = {}
01716 for a,b in chem_mapping.iteritems():
01717 new_a = tuple([x for x in a if x != sup1])
01718 new_b = tuple([x for x in b if x != sup2])
01719 chem_map[new_a] = new_b
01720 mapp_nr = 1.0
01721 for a, b in chem_map.iteritems():
01722 if len(a) == len(b):
01723 mapp_nr *= factorial(len(a))
01724 elif len(a) < len(b):
01725 mapp_nr *= binom(len(b), len(a))
01726 elif len(a) > len(b):
01727 mapp_nr *= binom(len(a), len(b))
01728 return int(mapp_nr)
01729
01730 def _ListPossibleMappings(sup1, sup2, chem_mapping):
01731 """
01732 Return a flat list of all possible mappings given *chem_mapping* and keeping
01733 mapping of *sup1* and *sup2* fixed. For instance if elements are chain names
01734 this is all the mappings to check for a given superposition.
01735
01736 Elements in first complex are defined by *chem_mapping.keys()* (list of list
01737 of elements) and elements in second complex by *chem_mapping.values()*. If
01738 complexes don't have same number of elements, we map only elements for the one
01739 with less. Also mapping is only between elements of mapped groups according to
01740 *chem_mapping*.
01741
01742 :rtype: :class:`list` of :class:`dict` (key = element in chem_mapping-key,
01743 value = element in chem_mapping-value)
01744 :param sup1: Element for which mapping is fixed.
01745 :type sup1: Like element in chem_mapping-key
01746 :param sup2: Element for which mapping is fixed.
01747 :type sup2: Like element in chem_mapping-value
01748 :param chem_mapping: Defines mapping between groups of elements (e.g. result
01749 of :func:`_LimitChemMapping`).
01750 :type chem_mapping: :class:`dict` with key / value = :class:`tuple`
01751
01752 :raises: :class:`QSscoreError` if reference complex (first one or one with
01753 less elements) has more elements for any given mapped group.
01754 """
01755
01756 chain1 = [i for s in chem_mapping.keys() for i in s]
01757 chain2 = [i for s in chem_mapping.values() for i in s]
01758 swap = False
01759 if len(chain1) > len(chain2):
01760 swap = True
01761
01762 chem_map = {}
01763 for a, b in chem_mapping.iteritems():
01764 new_a = tuple([x for x in a if x != sup1])
01765 new_b = tuple([x for x in b if x != sup2])
01766 if swap:
01767 chem_map[new_b] = new_a
01768 else:
01769 chem_map[new_a] = new_b
01770
01771
01772 chem_perm = []
01773 chem_ref = []
01774 for a, b in chem_map.iteritems():
01775 if len(a) == len(b):
01776 chem_perm.append(list(itertools.permutations(b)))
01777 chem_ref.append(a)
01778 elif len(a) < len(b):
01779 chem_perm.append(list(itertools.combinations(b, len(a))))
01780 chem_ref.append(a)
01781 else:
01782 raise QSscoreError('Impossible to define reference group: %s' \
01783 % str(chem_map))
01784
01785 mappings = []
01786 flat_ref = [i for s in chem_ref for i in s]
01787 for perm in itertools.product(*chem_perm):
01788 flat_perm = [i for s in perm for i in s]
01789 d = {c1: c2 for c1, c2 in zip(flat_ref, flat_perm)}
01790 if swap:
01791 d = {v: k for k, v in d.items()}
01792 d.update({sup1: sup2})
01793 mappings.append(d)
01794 return mappings
01795
01796
01797 def _CheckClosedSymmetry(ent_1, ent_2, symm_1, symm_2, chem_mapping,
01798 sup_thr=4, sup_fract=0.8, find_best=True):
01799 """
01800 Quick check if we can superpose two chains and get a mapping for all other
01801 chains using the same transformation. The mapping is defined by sufficient
01802 overlap of the transformed chain of *ent_1* onto another chain in *ent_2*.
01803
01804 :param ent_1: Entity to map to *ent_2* (perfect alignment assumed between
01805 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_1`).
01806 Views are ok but only to select full chains!
01807 :param ent_2: Entity to map to (perfect alignment assumed between
01808 chains of same chem. group as in :attr:`QSscorer.ent_to_cm_2`).
01809 Views are ok but only to select full chains!
01810 :param symm_1: Symmetry groups to use. We only superpose chains within
01811 reference symmetry group of *symm_1* and *symm_2*.
01812 See :attr:`QSscorer.symm_1`
01813 :param symm_2: See :attr:`QSscorer.symm_2`
01814 :param chem_mapping: See :attr:`QSscorer.chem_mapping`.
01815 All chains in *ent_1* / *ent_2* must be listed here!
01816 :param sup_thr: Distance around transformed chain in *ent_1* to check for
01817 overlap.
01818 :type sup_thr: :class:`float`
01819 :param sup_fract: Fraction of atoms in chain of *ent_2* that must be
01820 overlapped for overlap to be sufficient.
01821 :type sup_fract: :class:`float`
01822 :param find_best: If True, we look for best mapping according to
01823 :func:`_ChainRMSD`. Otherwise, we return first suitable
01824 mapping.
01825 :type find_best: :class:`bool`
01826
01827 :return: Mapping from *ent_1* to *ent_2* or None if none found. Mapping, if
01828 found, is as in :attr:`QSscorer.chain_mapping`.
01829 :rtype: :class:`dict` (:class:`str` / :class:`str`)
01830 """
01831
01832 ref_symm_1 = symm_1[0]
01833 ref_symm_2 = symm_2[0]
01834 asu_chem_mapping = _LimitChemMapping(chem_mapping, ref_symm_1, ref_symm_2)
01835
01836 rmsd_mappings = []
01837 for g1, g2 in asu_chem_mapping.iteritems():
01838
01839
01840
01841 for c1, c2 in itertools.product(g1, g2):
01842
01843 chain_1 = ent_1.Select('cname=%s' % c1)
01844 chain_2 = ent_2.Select('cname=%s' % c2)
01845 res = mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
01846
01847 mapping = _GetSuperpositionMapping(ent_1, ent_2, chem_mapping,
01848 res.transformation, sup_thr,
01849 sup_fract)
01850 if not mapping:
01851 continue
01852
01853 if not find_best:
01854 return mapping
01855 else:
01856
01857 rmsd = _GetMappedRMSD(ent_1, ent_2, mapping, res.transformation)
01858 rmsd_mappings.append((rmsd, mapping))
01859
01860 if rmsd_mappings:
01861 return min(rmsd_mappings)[1]
01862 else:
01863 return None
01864
01865 def _GetSuperpositionMapping(ent_1, ent_2, chem_mapping, transformation,
01866 sup_thr, sup_fract):
01867 """
01868 :return: Dict with chain mapping from *ent_1* to *ent_2* or None if failed
01869 (see :func:`_CheckClosedSymmetry`).
01870 :param ent_1: See :func:`_CheckClosedSymmetry`
01871 :param ent_2: See :func:`_CheckClosedSymmetry`
01872 :param chem_mapping: See :func:`_CheckClosedSymmetry`
01873 :param transformation: Superposition transformation to be applied to *ent_1*.
01874 :param sup_thr: See :func:`_CheckClosedSymmetry`
01875 :param sup_fract: See :func:`_CheckClosedSymmetry`
01876 """
01877
01878
01879
01880
01881
01882 if ent_1.chain_count > ent_2.chain_count:
01883 swap = True
01884 ent_1, ent_2 = ent_2, ent_1
01885 transformation = geom.Invert(transformation)
01886 chem_pairs = zip(chem_mapping.values(), chem_mapping.keys())
01887 else:
01888 swap = False
01889 chem_pairs = chem_mapping.iteritems()
01890
01891 if ent_1.chain_count == 0:
01892 return None
01893
01894 chem_partners = dict()
01895 for cg1, cg2 in chem_pairs:
01896 for ch in cg1:
01897 chem_partners[ch] = set(cg2)
01898
01899 mapping = dict()
01900 mapped_chains = set()
01901 for ch_1 in ent_1.chains:
01902
01903 ch_atoms = {ch_2.name: set() for ch_2 in ent_2.chains}
01904 for a_1 in ch_1.handle.atoms:
01905 transformed_pos = geom.Vec3(transformation * geom.Vec4(a_1.pos))
01906 a_within = ent_2.FindWithin(transformed_pos, sup_thr)
01907 for a_2 in a_within:
01908 ch_atoms[a_2.chain.name].add(a_2.hash_code)
01909
01910 max_num, max_name = max((len(atoms), name)
01911 for name, atoms in ch_atoms.iteritems())
01912
01913 ch_2 = ent_2.FindChain(max_name)
01914 if max_num == 0 or max_num / float(ch_2.handle.atom_count) < sup_fract:
01915 return None
01916
01917 ch_1_name = ch_1.name
01918 if ch_1_name not in chem_partners:
01919 raise RuntimeError("Chem. mapping doesn't contain all chains!")
01920 if max_name not in chem_partners[ch_1_name]:
01921 return None
01922
01923 if max_name in mapped_chains:
01924 return None
01925
01926 mapping[ch_1_name] = max_name
01927 mapped_chains.add(max_name)
01928
01929 if swap:
01930 mapping = {v: k for k, v in mapping.iteritems()}
01931 return mapping
01932
01933 def _GetMappedRMSD(ent_1, ent_2, chain_mapping, transformation):
01934 """
01935 :return: RMSD between complexes considering chain mapping.
01936 :param ent_1: Entity mapped to *ent_2* (perfect alignment assumed between
01937 mapped chains as in :attr:`QSscorer.ent_to_cm_1`).
01938 :param ent_2: Entity which was mapped to (perfect alignment assumed between
01939 mapped chains as in :attr:`QSscorer.ent_to_cm_2`).
01940 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
01941 :param transformation: Superposition transformation to be applied to *ent_1*.
01942 """
01943
01944 rmsds = []
01945 atoms = []
01946 for c1, c2 in chain_mapping.iteritems():
01947
01948 chain_1 = ent_1.Select('cname=%s' % c1)
01949 chain_2 = ent_2.Select('cname=%s' % c2)
01950 atom_count = chain_1.atom_count
01951 if atom_count != chain_2.atom_count:
01952 raise RuntimeError('Chains in _GetMappedRMSD must be perfectly aligned!')
01953
01954 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
01955
01956 rmsds.append(rmsd)
01957 atoms.append(float(atom_count))
01958
01959 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
01960 return rmsd
01961
01962 class _CachedRMSD:
01963 """Helper object for repetitive RMSD calculations.
01964 Meant to speed up :func:`_GetChainMapping` but could also be used to replace
01965 :func:`_GetMappedRMSD` in :func:`_CheckClosedSymmetry`.
01966
01967 :param ent_1: See :attr:`QSscorer.ent_to_cm_1`
01968 :param ent_2: See :attr:`QSscorer.ent_to_cm_2`
01969 """
01970 def __init__(self, ent_1, ent_2):
01971
01972 self.ent_1 = ent_1
01973 self.ent_2 = ent_2
01974 self._chain_views_1 = dict()
01975 self._chain_views_2 = dict()
01976 self._pair_rmsd = dict()
01977
01978 def GetChainView1(self, cname):
01979 """Get cached view on chain *cname* for :attr:`ent_1`."""
01980 if cname not in self._chain_views_1:
01981 self._chain_views_1[cname] = self.ent_1.Select('cname=%s' % cname)
01982 return self._chain_views_1[cname]
01983
01984 def GetChainView2(self, cname):
01985 """Get cached view on chain *cname* for :attr:`ent_2`."""
01986 if cname not in self._chain_views_2:
01987 self._chain_views_2[cname] = self.ent_2.Select('cname=%s' % cname)
01988 return self._chain_views_2[cname]
01989
01990 def GetSuperposition(self, c1, c2):
01991 """Get superposition result (no change in entities!) for *c1* to *c2*.
01992 This invalidates cached RMSD results used in :func:`GetMappedRMSD`.
01993
01994 :param c1: chain name for :attr:`ent_1`.
01995 :param c2: chain name for :attr:`ent_2`.
01996 """
01997
01998 self._pair_rmsd = dict()
01999
02000 chain_1 = self.GetChainView1(c1)
02001 chain_2 = self.GetChainView2(c2)
02002 if chain_1.atom_count != chain_2.atom_count:
02003 raise RuntimeError('Chains in GetSuperposition not perfectly aligned!')
02004 return mol.alg.SuperposeSVD(chain_1, chain_2, apply_transform=False)
02005
02006 def GetMappedRMSD(self, chain_mapping, transformation):
02007 """
02008 :return: RMSD between complexes considering chain mapping.
02009 :param chain_mapping: See :attr:`QSscorer.chain_mapping`.
02010 :param transformation: Superposition transformation (e.g. res.transformation
02011 for res = :func:`GetSuperposition`).
02012 """
02013
02014 rmsds = []
02015 atoms = []
02016 for c1, c2 in chain_mapping.iteritems():
02017
02018 if (c1, c2) in self._pair_rmsd:
02019 atom_count, rmsd = self._pair_rmsd[(c1, c2)]
02020 else:
02021
02022 chain_1 = self.GetChainView1(c1)
02023 chain_2 = self.GetChainView2(c2)
02024 atom_count = chain_1.atom_count
02025 if atom_count != chain_2.atom_count:
02026 raise RuntimeError('Chains in GetMappedRMSD not perfectly aligned!')
02027
02028 rmsd = mol.alg.CalculateRMSD(chain_1, chain_2, transformation)
02029 self._pair_rmsd[(c1, c2)] = (atom_count, rmsd)
02030
02031 rmsds.append(rmsd)
02032 atoms.append(float(atom_count))
02033
02034 rmsd = np.sqrt( sum([a * r**2 for a, r in zip(atoms, rmsds)]) / sum(atoms) )
02035 return rmsd
02036
02037
02038 def _CleanUserSymmetry(symm, ent):
02039 """Clean-up user provided symmetry.
02040
02041 :param symm: Symmetry group as in :attr:`QSscorer.symm_1`
02042 :param ent: Entity from which to extract chain names
02043 :type ent: :class:`~ost.mol.EntityHandle` or :class:`~ost.mol.EntityView`
02044 :return: New symmetry group limited to chains in sub-structure *ent*
02045 (see :attr:`QSscorer.symm_1`). Empty list if invalid symmetry.
02046 """
02047
02048 chain_names = set([ch.name for ch in ent.chains])
02049 new_symm = list()
02050 for symm_group in symm:
02051 new_group = tuple(ch for ch in symm_group if ch in chain_names)
02052 if new_group:
02053 new_symm.append(new_group)
02054
02055 if new_symm != symm:
02056 LogInfo("Cleaned user symmetry for %s: %s" % (ent.GetName(), new_symm))
02057
02058 lengths = [len(symm_group) for symm_group in new_symm]
02059 if lengths[1:] != lengths[:-1]:
02060 LogWarning('User symmetry group of different sizes for %s. Ignoring it!' \
02061 % (ent.GetName()))
02062 return []
02063
02064 s_chain_names = set([ch for symm_group in new_symm for ch in symm_group])
02065 if len(s_chain_names) != sum(lengths):
02066 LogWarning('User symmetry group for %s has duplicate chains. Ignoring it!' \
02067 % (ent.GetName()))
02068 return []
02069 if s_chain_names != chain_names:
02070 LogWarning('User symmetry group for %s is missing chains. Ignoring it!' \
02071 % (ent.GetName()))
02072 return []
02073
02074 return new_symm
02075
02076 def _AreValidSymmetries(symm_1, symm_2):
02077 """Check symmetry pair for major problems.
02078
02079 :return: False if any of the two is empty or if they're compatible in size.
02080 :rtype: :class:`bool`
02081 """
02082 if not symm_1 or not symm_2:
02083 return False
02084 if len(symm_1) != 1 or len(symm_2) != 1:
02085 if not len(symm_1[0]) == len(symm_2[0]):
02086 LogWarning('Symmetry groups of different sizes. Ignoring them!')
02087 return False
02088 return True
02089
02090 def _GetMappedAlignments(ent_1, ent_2, chain_mapping):
02091 """
02092 :return: Alignments of 2 structures given chain mapping
02093 (see :attr:`QSscorer.alignments`).
02094 :param ent_1: Entity containing all chains in *chain_mapping.keys()*.
02095 Views to this entity attached to first sequence of each aln.
02096 :param ent_2: Entity containing all chains in *chain_mapping.values()*.
02097 Views to this entity attached to second sequence of each aln.
02098 :param chain_mapping: See :attr:`QSscorer.chain_mapping`
02099 """
02100 alns = []
02101 for ch_1_name in sorted(chain_mapping):
02102
02103 ch_1 = ent_1.FindChain(ch_1_name)
02104 seq_1 = seq.SequenceFromChain(ch_1.name, ch_1)
02105 ch_2 = ent_2.FindChain(chain_mapping[ch_1_name])
02106 seq_2 = seq.SequenceFromChain(ch_2.name, ch_2)
02107
02108 aln = _AlignAtomSeqs(seq_1, seq_2)
02109 if aln: alns.append(aln)
02110 return alns
02111
02112 def _GetMappedResidues(alns):
02113 """
02114 :return: Mapping of shared residues in *alns* (with views attached)
02115 (see :attr:`QSscorer.mapped_residues`).
02116 :param alns: See :attr:`QSscorer.alignments`
02117 """
02118 mapped_residues = dict()
02119 for aln in alns:
02120
02121 c1 = aln.GetSequence(0).name
02122 mapped_residues[c1] = dict()
02123
02124 v1, v2 = seq.ViewsFromAlignment(aln)
02125 for res_1, res_2 in zip(v1.residues, v2.residues):
02126 r1 = res_1.number.num
02127 r2 = res_2.number.num
02128 mapped_residues[c1][r1] = r2
02129
02130 return mapped_residues
02131
02132 def _GetExtraWeights(contacts, done, mapped_residues):
02133 """Return sum of extra weights for contacts of chains in set and not in done.
02134 :return: Tuple (weight_extra_mapped, weight_extra_all).
02135 weight_extra_mapped only sums if both cX,rX in mapped_residues
02136 weight_extra_all sums all
02137 :param contacts: See :func:`GetContacts` for first entity
02138 :param done: List of (c1, c2, r1, r2) tuples to ignore
02139 :param mapped_residues: See :func:`_GetMappedResidues`
02140 """
02141 weight_extra_mapped = 0
02142 weight_extra_non_mapped = 0
02143 for c1 in contacts:
02144 for c2 in contacts[c1]:
02145 for r1 in contacts[c1][c2]:
02146 for r2 in contacts[c1][c2][r1]:
02147 if (c1, c2, r1, r2) not in done:
02148 weight = _weight(contacts[c1][c2][r1][r2])
02149 if c1 in mapped_residues and r1 in mapped_residues[c1] \
02150 and c2 in mapped_residues and r2 in mapped_residues[c2]:
02151 weight_extra_mapped += weight
02152 else:
02153 weight_extra_non_mapped += weight
02154 return weight_extra_mapped, weight_extra_mapped + weight_extra_non_mapped
02155
02156 def _GetScores(contacts_1, contacts_2, mapped_residues, chain_mapping):
02157 """Get QS scores (see :class:`QSscorer`).
02158
02159 Note that if some chains are not to be considered at all, they must be removed
02160 from *contacts_1* / *contacts_2* prior to calling this.
02161
02162 :param contacts_1: See :func:`GetContacts` for first entity
02163 :param contacts_2: See :func:`GetContacts` for second entity
02164 :param mapped_residues: See :func:`_GetMappedResidues`
02165 :param chain_mapping: Maps any chain name in *mapped_residues* to chain name
02166 for *contacts_2*.
02167 :type chain_mapping: :class:`dict` (:class:`str` / :class:`str`)
02168 :return: Tuple (QS_best, QS_global) (see :attr:`QSscorer.best_score` and
02169 see :attr:`QSscorer.global_score`)
02170 """
02171
02172 done_1 = set()
02173 done_2 = set()
02174 weighted_scores = 0
02175 weight_sum = 0
02176
02177
02178 for c11 in contacts_1:
02179
02180 if c11 not in mapped_residues: continue
02181 c1T = chain_mapping[c11]
02182
02183 for c21 in contacts_1[c11]:
02184
02185 if c21 not in mapped_residues: continue
02186 c2T = chain_mapping[c21]
02187
02188 flipped_chains = (c1T > c2T)
02189 if flipped_chains:
02190 c12, c22 = c2T, c1T
02191 else:
02192 c12, c22 = c1T, c2T
02193
02194 if c12 not in contacts_2 or c22 not in contacts_2[c12]: continue
02195
02196 for r11 in contacts_1[c11][c21]:
02197
02198 if r11 not in mapped_residues[c11]: continue
02199 r1T = mapped_residues[c11][r11]
02200
02201 for r21 in contacts_1[c11][c21][r11]:
02202
02203 if r21 not in mapped_residues[c21]: continue
02204 r2T = mapped_residues[c21][r21]
02205
02206 if flipped_chains:
02207 r12, r22 = r2T, r1T
02208 else:
02209 r12, r22 = r1T, r2T
02210
02211 if r12 not in contacts_2[c12][c22]: continue
02212 if r22 not in contacts_2[c12][c22][r12]: continue
02213
02214 d1 = contacts_1[c11][c21][r11][r21]
02215 d2 = contacts_2[c12][c22][r12][r22]
02216 weight = _weight(min(d1, d2))
02217 weighted_scores += weight * (1 - abs(d1 - d2) / 12)
02218 weight_sum += weight
02219
02220 done_1.add((c11, c21, r11, r21))
02221 done_2.add((c12, c22, r12, r22))
02222
02223 LogVerbose("Shared contacts: %d" % len(done_1))
02224
02225
02226 weights_extra_1 = _GetExtraWeights(contacts_1, done_1, mapped_residues)
02227 mapped_residues_2 = dict()
02228 for c1 in mapped_residues:
02229 c2 = chain_mapping[c1]
02230 mapped_residues_2[c2] = set()
02231 for r1 in mapped_residues[c1]:
02232 mapped_residues_2[c2].add(mapped_residues[c1][r1])
02233 weights_extra_2 = _GetExtraWeights(contacts_2, done_2, mapped_residues_2)
02234 weight_extra_mapped = weights_extra_1[0] + weights_extra_2[0]
02235 weight_extra_all = weights_extra_1[1] + weights_extra_2[1]
02236
02237
02238 denom_best = weight_sum + weight_extra_mapped
02239 denom_all = weight_sum + weight_extra_all
02240 if denom_best == 0:
02241 QS_best = 0
02242 else:
02243 QS_best = weighted_scores / denom_best
02244 if denom_all == 0:
02245 QS_global = 0
02246 else:
02247 QS_global = weighted_scores / denom_all
02248 return QS_best, QS_global
02249
02250 def _weight(dist):
02251 """
02252 This weight expresses the probability of two residues to interact given the CB
02253 distance (from Xu et al. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2573399/)
02254 """
02255 if dist <= 5.0:
02256 return 1
02257 x = (dist-5.0)/4.28;
02258 return np.exp(-2*x*x)
02259
02260
02261 def _GetQsSuperposition(alns):
02262 """
02263 :return: Superposition result based on shared CA atoms in *alns*
02264 (with views attached) (see :attr:`QSscorer.superposition`).
02265 :param alns: See :attr:`QSscorer.alignments`
02266 """
02267
02268 if not alns:
02269 raise QSscoreError('No successful alignments for superposition!')
02270
02271 view_1 = alns[0].GetSequence(0).attached_view.CreateEmptyView()
02272 view_2 = alns[0].GetSequence(1).attached_view.CreateEmptyView()
02273
02274 for aln in alns:
02275 for col in aln:
02276 res_1 = col.GetResidue(0)
02277 res_2 = col.GetResidue(1)
02278 if res_1.IsValid() and res_2.IsValid():
02279 ca_1 = res_1.FindAtom('CA')
02280 ca_2 = res_2.FindAtom('CA')
02281 if ca_1.IsValid() and ca_2.IsValid():
02282 view_1.AddAtom(ca_1)
02283 view_2.AddAtom(ca_2)
02284
02285 res = mol.alg.SuperposeSVD(view_1, view_2, apply_transform=False)
02286 return res
02287
02288
02289 def _AddResidue(edi, res, rnum, chain, calpha_only):
02290 """
02291 Add residue *res* with res. num. *run* to given *chain* using editor *edi*.
02292 Either all atoms added or (if *calpha_only*) only CA.
02293 """
02294 if calpha_only:
02295 ca_atom = res.FindAtom('CA')
02296 if ca_atom.IsValid():
02297 new_res = edi.AppendResidue(chain, res.name, rnum)
02298 edi.InsertAtom(new_res, ca_atom.name, ca_atom.pos)
02299 else:
02300 new_res = edi.AppendResidue(chain, res.name, rnum)
02301 for atom in res.atoms:
02302 edi.InsertAtom(new_res, atom.name, atom.pos)
02303
02304 def _MergeAlignedChains(alns, ent_1, ent_2, calpha_only):
02305 """
02306 Create two new entities (based on the alignments attached views) where all
02307 residues have same numbering (when they're aligned) and they are all pushed to
02308 a single chain X. Also append extra chains contained in *ent_1* and *ent_2*
02309 but not contained in *alns*.
02310
02311 Used for :attr:`QSscorer.lddt_ref` and :attr:`QSscorer.lddt_mdl`
02312
02313 :param alns: List of alignments with attached views (first sequence: *ent_1*,
02314 second: *ent_2*). Residue number in single chain is column index
02315 of current alignment + sum of lengths of all previous alignments
02316 (order of alignments as in input list).
02317 :type alns: See :attr:`QSscorer.alignments`
02318 :param ent_1: First entity to process.
02319 :type ent_1: :class:`~ost.mol.EntityHandle`
02320 :param ent_2: Second entity to process.
02321 :type ent_2: :class:`~ost.mol.EntityHandle`
02322 :param calpha_only: If True, we only include CA atoms instead of all.
02323 :type calpha_only: :class:`bool`
02324
02325 :return: Tuple of two single chain entities (from *ent_1* and from *ent_2*)
02326 :rtype: :class:`tuple` of :class:`~ost.mol.EntityHandle`
02327 """
02328
02329 ent_ren_1 = mol.CreateEntity()
02330 ed_1 = ent_ren_1.EditXCS()
02331 new_chain_1 = ed_1.InsertChain('X')
02332
02333 ent_ren_2 = mol.CreateEntity()
02334 ed_2 = ent_ren_2.EditXCS()
02335 new_chain_2 = ed_2.InsertChain('X')
02336
02337 rnum = 0
02338 chain_done_1 = set()
02339 chain_done_2 = set()
02340 for aln in alns:
02341 chain_done_1.add(aln.GetSequence(0).name)
02342 chain_done_2.add(aln.GetSequence(1).name)
02343 for col in aln:
02344 rnum += 1
02345
02346 res_1 = col.GetResidue(0)
02347 if res_1.IsValid():
02348 _AddResidue(ed_1, res_1, rnum, new_chain_1, calpha_only)
02349 res_2 = col.GetResidue(1)
02350 if res_2.IsValid():
02351 _AddResidue(ed_2, res_2, rnum, new_chain_2, calpha_only)
02352
02353 for chain in ent_1.chains:
02354 if chain.name in chain_done_1:
02355 continue
02356 for res in chain.residues:
02357 rnum += 1
02358 _AddResidue(ed_1, res, rnum, new_chain_1, calpha_only)
02359 for chain in ent_2.chains:
02360 if chain.name in chain_done_2:
02361 continue
02362 for res in chain.residues:
02363 rnum += 1
02364 _AddResidue(ed_2, res, rnum, new_chain_2, calpha_only)
02365
02366 ent_ren_1.SetName(aln.GetSequence(0).GetAttachedView().GetName())
02367 ent_ren_2.SetName(aln.GetSequence(1).GetAttachedView().GetName())
02368
02369 if not conop.GetDefaultLib():
02370 raise RuntimeError("QSscore computation requires a compound library!")
02371 pr = conop.RuleBasedProcessor(conop.GetDefaultLib())
02372 pr.Process(ent_ren_1)
02373 ed_1.UpdateICS()
02374 pr.Process(ent_ren_2)
02375 ed_2.UpdateICS()
02376 return ent_ren_1, ent_ren_2
02377
02378 def _ComputeLDDTScore(ref, mdl):
02379 """
02380 :return: lDDT of *mdl* vs *ref* (see :attr:`QSscorer.lddt_score`).
02381 :param mdl: Reference entity (see :attr:`QSscorer.lddt_mdl`)
02382 :param ref: Model entity (see :attr:`QSscorer.lddt_ref`)
02383 """
02384
02385 LogInfo('Reference %s has: %s residues' % (ref.GetName(), ref.residue_count))
02386 LogInfo('Model %s has: %s residues' % (mdl.GetName(), mdl.residue_count))
02387
02388 lddt_score = mol.alg.LocalDistDiffTest(mdl.Select(''), ref.Select(''),
02389 2., 8., 'lddt')
02390 LogInfo('lDDT score: %.3f' % lddt_score)
02391
02392 for r in mdl.residues:
02393 if r.HasProp('lddt'):
02394 for a in r.atoms:
02395 a.SetBFactor(r.GetFloatProp('lddt'))
02396 else:
02397 for a in r.atoms:
02398 a.SetBFactor(0.0)
02399
02400 return lddt_score
02401
02402
02403 __all__ = ('QSscoreError', 'QSscorer', 'QSscoreEntity', 'FilterContacts',
02404 'GetContacts')