3 from scipy.spatial
import distance
9 """ Helper object for QS-score computation
11 Holds structural information and getters for interacting chains, i.e.
12 interfaces. Peptide residues are represented by their CB position
13 (CA for GLY) and nucleotides by C3'.
15 :param ent: Structure for QS score computation
16 :type ent: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
17 :param contact_d: Pairwise distance of residues to be considered as contacts
18 :type contact_d: :class:`float`
21 pep_query =
"(peptide=true and (aname=\"CB\" or (rname=\"GLY\" and aname=\"CA\")))"
22 nuc_query =
"(nucleotide=True and aname=\"C3'\")"
23 self.
_view = ent.Select(
" or ".join([pep_query, nuc_query]))
39 """ Processed structure
41 View that only contains representative atoms. That's CB for peptide
42 residues (CA for GLY) and C3' for nucleotides.
44 :type: :class:`ost.mol.EntityView`
50 """ Pairwise distance of residues to be considered as contacts
52 Given at :class:`QSEntity` construction
60 """ Chain names in :attr:`~view`
64 :type: :class:`list` of :class:`str`
67 self.
_chain_names = sorted([ch.name
for ch
in self.view.chains])
72 """ Pairs of chains in :attr:`~view` with at least one contact
74 :type: :class:`list` of :class:`tuples`
78 for x
in itertools.combinations(self.
chain_names, 2):
81 self._interacting_chains.append(x)
87 :param chain_name: Chain in :attr:`~view`
88 :type chain_name: :class:`str`
90 chain = self.view.FindChain(chain_name)
91 if not chain.IsValid():
92 raise RuntimeError(f
"view has no chain named \"{chain_name}\"")
96 """ Get sequence of chain
98 Returns sequence of specified chain as raw :class:`str`
100 :param chain_name: Chain in :attr:`~view`
101 :type chain_name: :class:`str`
105 s =
''.join([r.one_letter_code
for r
in ch.residues])
110 """ Get representative positions of chain
112 That's CB positions for peptide residues (CA for GLY) and C3' for
113 nucleotides. Returns positions as a numpy array of shape
116 :param chain_name: Chain in :attr:`~view`
117 :type chain_name: :class:`str`
119 if chain_name
not in self.
_pos:
121 pos = np.zeros((len(ch.residues), 3))
122 for i, r
in enumerate(ch.residues):
123 pos[i,:] = r.atoms[0].
GetPos().data
124 self.
_pos[chain_name] = pos
125 return self.
_pos[chain_name]
127 def PairDist(self, chain_name_one, chain_name_two):
128 """ Get pairwise distances between two chains
130 Returns distances as numpy array of shape (a, b).
131 Where a is the number of residues of the chain that comes BEFORE the
132 other in :attr:`~chain_names`
134 key = (min(chain_name_one, chain_name_two),
135 max(chain_name_one, chain_name_two))
143 """ Get min x,y,z cooridnates for given chain
145 Based on positions that are extracted with GetPos
147 :param chain_name: Chain in :attr:`~view`
148 :type chain_name: :class:`str`
155 """ Get max x,y,z cooridnates for given chain
157 Based on positions that are extracted with GetPos
159 :param chain_name: Chain in :attr:`~view`
160 :type chain_name: :class:`str`
167 """ Returns True if chains potentially interact
169 Based on crude collision detection. There is no guarantee
170 that they actually interact if True is returned. However,
171 if False is returned, they don't interact for sure.
173 :param chain_name_one: Chain in :attr:`~view`
174 :type chain_name_one: class:`str`
175 :param chain_name_two: Chain in :attr:`~view`
176 :type chain_name_two: class:`str`
182 if np.max(min_one - max_two) > self.
contact_d:
184 if np.max(min_two - max_one) > self.
contact_d:
190 Holds data relevant for QS-score computation. Formulas for QS scores:
194 - QS_best = weighted_scores / (weight_sum + weight_extra_mapped)
195 - QS_global = weighted_scores / (weight_sum + weight_extra_all)
196 -> weighted_scores = sum(w(min(d1,d2)) * (1 - abs(d1-d2)/12)) for shared
197 -> weight_sum = sum(w(min(d1,d2))) for shared
198 -> weight_extra_mapped = sum(w(d)) for all mapped but non-shared
199 -> weight_extra_all = sum(w(d)) for all non-shared
200 -> w(d) = 1 if d <= 5, exp(-2 * ((d-5.0)/4.28)^2) else
202 In the formulas above:
204 * "d": CA/CB-CA/CB distance of an "inter-chain contact" ("d1", "d2" for
206 * "mapped": we could map chains of two structures and align residues in
208 * "shared": pairs of residues which are "mapped" and have
209 "inter-chain contact" in both structures.
210 * "inter-chain contact": CB-CB pairs (CA for GLY) with distance <= 12 A
211 (fallback to CA-CA if :attr:`calpha_only` is True).
212 * "w(d)": weighting function (prob. of 2 res. to interact given CB distance)
213 from `Xu et al. 2009 <https://dx.doi.org/10.1016%2Fj.jmb.2008.06.002>`_.
215 def __init__(self, weighted_scores, weight_sum, weight_extra_mapped,
224 """ weighted_scores attribute as described in formula section above
226 :type: :class:`float`
232 """ weight_sum attribute as described in formula section above
234 :type: :class:`float`
240 """ weight_extra_mapped attribute as described in formula section above
242 :type: :class:`float`
248 """ weight_extra_all attribute as described in formula section above
250 :type: :class:`float`
256 """ QS_best - the actual score as described in formula section above
258 :type: :class:`float`
262 if denominator != 0.0:
263 return nominator/denominator
269 """ QS_global - the actual score as described in formula section above
271 :type: :class:`float`
275 if denominator != 0.0:
276 return nominator/denominator
282 """ Helper object to compute QS-score
284 Tightly integrated into the mechanisms from the chain_mapping module.
285 The prefered way to derive an object of type :class:`QSScorer` is through
286 the static constructor: :func:`~FromMappingResult`. Example score
287 computation including mapping:
291 from ost.mol.alg.qsscore import QSScorer
292 from ost.mol.alg.chain_mapping import ChainMapper
294 ent_1 = io.LoadPDB("path_to_assembly_1.pdb")
295 ent_2 = io.LoadPDB("path_to_assembly_2.pdb")
297 chain_mapper = ChainMapper(ent_1)
298 mapping_result = chain_mapper.GetlDDTMapping(ent_2)
299 qs_scorer = QSScorer.FromMappingResult(mapping_result)
300 score_result = qs_scorer.Score(mapping_result.mapping)
301 print("score:", score_result.QS_global)
303 QS-score computation in :func:`QSScorer.Score` implements caching.
304 Repeated computations with alternative chain mappings thus become faster.
306 :param target: Structure designated as "target". Can be fetched from
307 :class:`ost.mol.alg.chain_mapping.MappingResult`
308 :type target: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
309 :param chem_groups: Groups of chemically equivalent chains in *target*.
311 :class:`ost.mol.alg.chain_mapping.MappingResult`
312 :type chem_groups: :class:`list` of :class:`list` of :class:`str`
313 :param model: Structure designated as "model". Can be fetched from
314 :class:`ost.mol.alg.chain_mapping.MappingResult`
315 :type model: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
316 :param alns: Each alignment is accessible with ``alns[(t_chain,m_chain)]``.
317 First sequence is the sequence of the respective chain in
318 :attr:`~qsent1`, second sequence the one from :attr:`~qsent2`.
320 :class:`ost.mol.alg.chain_mapping.MappingResult`
321 :type alns: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
322 :class:`ost.seq.AlignmentHandle`
324 def __init__(self, target, chem_groups, model, alns, contact_d = 12.0):
329 chem_group_ch_names = list(itertools.chain.from_iterable(chem_groups))
330 if self._qsent1.chain_names != sorted(chem_group_ch_names):
331 raise RuntimeError(f
"Expect exact same chain names in chem_groups "
332 f
"and in target (which is processed to only "
333 f
"contain peptides/nucleotides). target: "
334 f
"{self._qsent1.chain_names}, chem_groups: "
335 f
"{chem_group_ch_names}")
361 """ The preferred way to get a :class:`QSScorer`
363 Static constructor that derives an object of type :class:`QSScorer`
364 using a :class:`ost.mol.alg.chain_mapping.MappingResult`
366 :param mapping_result: Data source
367 :type mapping_result: :class:`ost.mol.alg.chain_mapping.MappingResult`
369 qs_scorer =
QSScorer(mapping_result.target, mapping_result.chem_groups,
370 mapping_result.model, alns = mapping_result.alns)
375 """ Represents *target*
377 :type: :class:`QSEntity`
383 """ Groups of chemically equivalent chains in *target*
385 Provided at object construction
387 :type: :class:`list` of :class:`list` of :class:`str`
393 """ Represents *model*
395 :type: :class:`QSEntity`
401 """ Alignments between chains in :attr:`~qsent1` and :attr:`~qsent2`
403 Provided at object construction. Each alignment is accessible with
404 ``alns[(t_chain,m_chain)]``. First sequence is the sequence of the
405 respective chain in :attr:`~qsent1`, second sequence the one from
408 :type: :class:`dict` with key: :class:`tuple` of :class:`str`, value:
409 :class:`ost.seq.AlignmentHandle`
413 def Score(self, mapping, check=True):
414 """ Computes QS-score given chain mapping
416 Again, the preferred way is to get *mapping* is from an object
417 of type :class:`ost.mol.alg.chain_mapping.MappingResult`.
420 :attr:`ost.mol.alg.chain_mapping.MappingResult.mapping`
421 :type mapping: :class:`list` of :class:`list` of :class:`str`
422 :param check: Perform input checks, can be disabled for speed purposes
423 if you know what you're doing.
424 :type check: :class:`bool`
425 :returns: Result object of type :class:`QSScorerResult`
431 raise RuntimeError(
"Dimensions of self.chem_groups and mapping "
435 raise RuntimeError(
"Dimensions of self.chem_groups and "
436 "mapping must match")
438 for name
in itertools.chain.from_iterable(mapping):
439 if name
is not None and name
not in self.qsent2.chain_names:
440 raise RuntimeError(f
"Each chain in mapping must be present "
441 f
"in self.qsent2. No match for "
444 flat_mapping = dict()
446 flat_mapping.update({x: y
for x, y
in zip(a, b)
if y
is not None})
451 """ Computes QS-score only considering one interface
453 This only works for interfaces that are computed in :func:`Score`, i.e.
454 interfaces for which the alignments are set up correctly.
456 :param trg_ch1: Name of first interface chain in target
457 :type trg_ch1: :class:`str`
458 :param trg_ch2: Name of second interface chain in target
459 :type trg_ch2: :class:`str`
460 :param mdl_ch1: Name of first interface chain in model
461 :type mdl_ch1: :class:`str`
462 :param mdl_ch2: Name of second interface chain in model
463 :type mdl_ch2: :class:`str`
464 :returns: Result object of type :class:`QSScorerResult`
465 :raises: :class:`RuntimeError` if no aln for trg_ch1/mdl_ch1 or
466 trg_ch2/mdl_ch2 is available.
468 if (trg_ch1, mdl_ch1)
not in self.
alns:
469 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
470 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
471 f
"construct the QSScorer object from a "
472 f
"MappingResult and are trg_ch1 and mdl_ch1 "
473 f
"mapped to each other?")
474 if (trg_ch2, mdl_ch2)
not in self.
alns:
475 raise RuntimeError(f
"No aln between trg_ch1 ({trg_ch1}) and "
476 f
"mdl_ch1 ({mdl_ch1}) available. Did you "
477 f
"construct the QSScorer object from a "
478 f
"MappingResult and are trg_ch1 and mdl_ch1 "
479 f
"mapped to each other?")
480 trg_int = (trg_ch1, trg_ch2)
481 mdl_int = (mdl_ch1, mdl_ch2)
486 """ Same as :func:`Score` but with flat mapping
488 :param flat_mapping: Dictionary with target chain names as keys and
489 the mapped model chain names as value
490 :type flat_mapping: :class:`dict` with :class:`str` as key and value
491 :returns: Result object of type :class:`QSScorerResult`
494 weighted_scores = 0.0
496 weight_extra_mapped = 0.0
497 weight_extra_all = 0.0
500 processed_qsent2_interfaces = set()
502 for int1
in self.qsent1.interacting_chains:
503 if int1[0]
in flat_mapping
and int1[1]
in flat_mapping:
504 int2 = (flat_mapping[int1[0]], flat_mapping[int1[1]])
508 weight_extra_mapped += c
509 weight_extra_all += d
510 processed_qsent2_interfaces.add((min(int2[0], int2[1]),
511 max(int2[0], int2[1])))
516 r_flat_mapping = {v:k
for k,v
in flat_mapping.items()}
517 for int2
in self.qsent2.interacting_chains:
518 if int2
not in processed_qsent2_interfaces:
519 if int2[0]
in r_flat_mapping
and int2[1]
in r_flat_mapping:
520 int1 = (r_flat_mapping[int2[0]], r_flat_mapping[int2[1]])
524 weight_extra_mapped += c
525 weight_extra_all += d
529 return QSScorerResult(weighted_scores, weight_sum, weight_extra_mapped,
532 def _MappedInterfaceScores(self, int1, int2):
533 key_one = (int1, int2)
536 key_two = ((int1[1], int1[0]), (int2[1], int2[0]))
540 weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all = \
542 self.
_mapped_cache[key_one] = (weighted_scores, weight_sum, weight_extra_mapped,
544 return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
546 def _InterfaceScores(self, int1, int2):
548 d1 = self.qsent1.PairDist(int1[0], int1[1])
549 d2 = self.qsent2.PairDist(int2[0], int2[1])
553 if int1[0] > int1[1]:
555 if int2[0] > int2[1]:
559 mapped_indices_1_1, mapped_indices_1_2 = \
562 mapped_indices_2_1, mapped_indices_2_2 = \
568 assert(self.qsent1.contact_d == self.qsent2.contact_d)
569 contact_d = self.qsent1.contact_d
570 mapped_idx_grid_1 = np.ix_(mapped_indices_1_1, mapped_indices_2_1)
571 mapped_idx_grid_2 = np.ix_(mapped_indices_1_2, mapped_indices_2_2)
573 if mapped_idx_grid_1[0].shape[0] == 0
or mapped_idx_grid_2[0].shape[0] == 0:
578 shared_mask_d1 = np.full(d1.shape,
False, dtype=bool)
579 shared_mask_d2 = np.full(d2.shape,
False, dtype=bool)
580 mapped_nonshared_mask_d1 = np.full(d1.shape,
False, dtype=bool)
581 mapped_nonshared_mask_d2 = np.full(d2.shape,
False, dtype=bool)
582 if mapped_idx_grid_1[0].shape[0] == 0:
585 mapped_d1_contacts = np.full(d1.shape,
False, dtype=bool)
587 mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
588 mapped_nonshared_mask_d1[mapped_idx_grid_1] = mapped_d1_contacts
590 if mapped_idx_grid_2[0].shape[0] == 0:
593 mapped_d2_contacts = np.full(d2.shape,
False, dtype=bool)
595 mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
596 mapped_nonshared_mask_d2[mapped_idx_grid_2] = mapped_d2_contacts
597 shared_mask = np.full(mapped_d1_contacts.shape,
False, dtype=bool)
599 mapped_d1_contacts = d1[mapped_idx_grid_1] < contact_d
600 mapped_d2_contacts = d2[mapped_idx_grid_2] < contact_d
601 shared_mask = np.logical_and(mapped_d1_contacts, mapped_d2_contacts)
602 shared_mask_d1 = np.full(d1.shape,
False, dtype=bool)
603 shared_mask_d1[mapped_idx_grid_1] = shared_mask
604 shared_mask_d2 = np.full(d2.shape,
False, dtype=bool)
605 shared_mask_d2[mapped_idx_grid_2] = shared_mask
608 mapped_nonshared_mask_d1 = np.full(d1.shape,
False, dtype=bool)
609 mapped_nonshared_mask_d1[mapped_idx_grid_1] = \
610 np.logical_and(np.logical_not(shared_mask), mapped_d1_contacts)
611 mapped_nonshared_mask_d2 = np.full(d2.shape,
False, dtype=bool)
612 mapped_nonshared_mask_d2[mapped_idx_grid_2] = \
613 np.logical_and(np.logical_not(shared_mask), mapped_d2_contacts)
616 shared_d1 = d1[shared_mask_d1]
617 shared_d2 = d2[shared_mask_d2]
618 shared_min = np.minimum(shared_d1, shared_d2)
619 shared_abs_diff_div_12 = np.abs(np.subtract(shared_d1, shared_d2))/12.0
620 weight_term = np.ones(shared_min.shape[0])
621 bigger_5_mask = shared_min > 5.0
622 weights = np.exp(-2.0*np.square((shared_min[bigger_5_mask]-5.0)/4.28))
623 weight_term[bigger_5_mask] = weights
624 diff_term = np.subtract(np.ones(weight_term.shape[0]),
625 shared_abs_diff_div_12)
626 weighted_scores = np.sum(np.multiply(weight_term, diff_term))
627 weight_sum = np.sum(weight_term)
630 nonshared_contact_mask_d1 = np.logical_and(np.logical_not(shared_mask_d1),
632 contact_distances = d1[nonshared_contact_mask_d1]
633 bigger_5 = contact_distances[contact_distances > 5]
634 weight_extra_all = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
636 weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
638 nonshared_contact_mask_d2 = np.logical_and(np.logical_not(shared_mask_d2),
640 contact_distances = d2[nonshared_contact_mask_d2]
641 bigger_5 = contact_distances[contact_distances > 5]
642 weight_extra_all += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
644 weight_extra_all += contact_distances.shape[0] - bigger_5.shape[0]
647 contact_distances = d1[mapped_nonshared_mask_d1]
648 bigger_5 = contact_distances[contact_distances > 5]
649 weight_extra_mapped = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
651 weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
653 contact_distances = d2[mapped_nonshared_mask_d2]
654 bigger_5 = contact_distances[contact_distances > 5]
655 weight_extra_mapped += np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
657 weight_extra_mapped += contact_distances.shape[0] - bigger_5.shape[0]
659 return (weighted_scores, weight_sum, weight_extra_mapped, weight_extra_all)
661 def _IndexMapping(self, ch1, ch2):
662 """ Fetches aln and returns indices of (non-)aligned residues
664 returns 2 numpy arrays containing the indices of residues in
665 ch1 and ch2 which are aligned
667 mapped_indices_1 = list()
668 mapped_indices_2 = list()
671 for col
in self.
alns[(ch1, ch2)]:
672 if col[0] !=
'-' and col[1] !=
'-':
673 mapped_indices_1.append(idx_1)
674 mapped_indices_2.append(idx_2)
679 return (np.array(mapped_indices_1), np.array(mapped_indices_2))
681 def _InterfacePenalty1(self, interface):
687 def _InterfacePenalty2(self, interface):
693 def _InterfacePenalty(self, qsent, interface):
694 d = qsent.PairDist(interface[0], interface[1])
695 contact_distances = d[d < qsent.contact_d]
696 bigger_5 = contact_distances[contact_distances > 5]
697 penalty = np.sum(np.exp(-2.0*np.square((bigger_5-5.0)/4.28)))
699 penalty += contact_distances.shape[0] - bigger_5.shape[0]
703 __all__ = (
'QSEntity',
'QSScorer')
def _MappedInterfaceScores